1#ifndef __ASM_PARAVIRT_H
2#define __ASM_PARAVIRT_H
3/* Various instructions on x86 need to be replaced for
4 * para-virtualization: those hooks are defined here. */
5
6#ifdef CONFIG_PARAVIRT
7#include <asm/page.h>
8
9/* Bitmask of what can be clobbered: usually at least eax. */
10#define CLBR_NONE 0x0
11#define CLBR_EAX 0x1
12#define CLBR_ECX 0x2
13#define CLBR_EDX 0x4
14#define CLBR_ANY 0x7
15
16#ifndef __ASSEMBLY__
17#include <linux/types.h>
18#include <linux/cpumask.h>
19#include <asm/kmap_types.h>
20
21struct page;
22struct thread_struct;
23struct Xgt_desc_struct;
24struct tss_struct;
25struct mm_struct;
26struct desc_struct;
27
28/* general info */
29struct pv_info {
30	unsigned int kernel_rpl;
31	int shared_kernel_pmd;
32	int paravirt_enabled;
33	const char *name;
34};
35
36struct pv_init_ops {
37	/*
38	 * Patch may replace one of the defined code sequences with
39	 * arbitrary code, subject to the same register constraints.
40	 * This generally means the code is not free to clobber any
41	 * registers other than EAX.  The patch function should return
42	 * the number of bytes of code generated, as we nop pad the
43	 * rest in generic code.
44	 */
45	unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
46			  unsigned long addr, unsigned len);
47
48	/* Basic arch-specific setup */
49	void (*arch_setup)(void);
50	char *(*memory_setup)(void);
51	void (*post_allocator_init)(void);
52
53	/* Print a banner to identify the environment */
54	void (*banner)(void);
55};
56
57
58struct pv_lazy_ops {
59	/* Set deferred update mode, used for batching operations. */
60	void (*enter)(void);
61	void (*leave)(void);
62};
63
64struct pv_time_ops {
65	void (*time_init)(void);
66
67	/* Set and set time of day */
68	unsigned long (*get_wallclock)(void);
69	int (*set_wallclock)(unsigned long);
70
71	unsigned long long (*sched_clock)(void);
72	unsigned long (*get_cpu_khz)(void);
73};
74
75struct pv_cpu_ops {
76	/* hooks for various privileged instructions */
77	unsigned long (*get_debugreg)(int regno);
78	void (*set_debugreg)(int regno, unsigned long value);
79
80	void (*clts)(void);
81
82	unsigned long (*read_cr0)(void);
83	void (*write_cr0)(unsigned long);
84
85	unsigned long (*read_cr4_safe)(void);
86	unsigned long (*read_cr4)(void);
87	void (*write_cr4)(unsigned long);
88
89	/* Segment descriptor handling */
90	void (*load_tr_desc)(void);
91	void (*load_gdt)(const struct Xgt_desc_struct *);
92	void (*load_idt)(const struct Xgt_desc_struct *);
93	void (*store_gdt)(struct Xgt_desc_struct *);
94	void (*store_idt)(struct Xgt_desc_struct *);
95	void (*set_ldt)(const void *desc, unsigned entries);
96	unsigned long (*store_tr)(void);
97	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
98	void (*write_ldt_entry)(struct desc_struct *,
99				int entrynum, u32 low, u32 high);
100	void (*write_gdt_entry)(struct desc_struct *,
101				int entrynum, u32 low, u32 high);
102	void (*write_idt_entry)(struct desc_struct *,
103				int entrynum, u32 low, u32 high);
104	void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t);
105
106	void (*set_iopl_mask)(unsigned mask);
107
108	void (*wbinvd)(void);
109	void (*io_delay)(void);
110
111	/* cpuid emulation, mostly so that caps bits can be disabled */
112	void (*cpuid)(unsigned int *eax, unsigned int *ebx,
113		      unsigned int *ecx, unsigned int *edx);
114
115	/* MSR, PMC and TSR operations.
116	   err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
117	u64 (*read_msr)(unsigned int msr, int *err);
118	int (*write_msr)(unsigned int msr, u64 val);
119
120	u64 (*read_tsc)(void);
121	u64 (*read_pmc)(void);
122
123	/* These two are jmp to, not actually called. */
124	void (*irq_enable_sysexit)(void);
125	void (*iret)(void);
126
127	struct pv_lazy_ops lazy_mode;
128};
129
130struct pv_irq_ops {
131	void (*init_IRQ)(void);
132
133	/*
134	 * Get/set interrupt state.  save_fl and restore_fl are only
135	 * expected to use X86_EFLAGS_IF; all other bits
136	 * returned from save_fl are undefined, and may be ignored by
137	 * restore_fl.
138	 */
139	unsigned long (*save_fl)(void);
140	void (*restore_fl)(unsigned long);
141	void (*irq_disable)(void);
142	void (*irq_enable)(void);
143	void (*safe_halt)(void);
144	void (*halt)(void);
145};
146
147struct pv_apic_ops {
148#ifdef CONFIG_X86_LOCAL_APIC
149	/*
150	 * Direct APIC operations, principally for VMI.  Ideally
151	 * these shouldn't be in this interface.
152	 */
153	void (*apic_write)(unsigned long reg, unsigned long v);
154	void (*apic_write_atomic)(unsigned long reg, unsigned long v);
155	unsigned long (*apic_read)(unsigned long reg);
156	void (*setup_boot_clock)(void);
157	void (*setup_secondary_clock)(void);
158
159	void (*startup_ipi_hook)(int phys_apicid,
160				 unsigned long start_eip,
161				 unsigned long start_esp);
162#endif
163};
164
165struct pv_mmu_ops {
166	/*
167	 * Called before/after init_mm pagetable setup. setup_start
168	 * may reset %cr3, and may pre-install parts of the pagetable;
169	 * pagetable setup is expected to preserve any existing
170	 * mapping.
171	 */
172	void (*pagetable_setup_start)(pgd_t *pgd_base);
173	void (*pagetable_setup_done)(pgd_t *pgd_base);
174
175	unsigned long (*read_cr2)(void);
176	void (*write_cr2)(unsigned long);
177
178	unsigned long (*read_cr3)(void);
179	void (*write_cr3)(unsigned long);
180
181	/*
182	 * Hooks for intercepting the creation/use/destruction of an
183	 * mm_struct.
184	 */
185	void (*activate_mm)(struct mm_struct *prev,
186			    struct mm_struct *next);
187	void (*dup_mmap)(struct mm_struct *oldmm,
188			 struct mm_struct *mm);
189	void (*exit_mmap)(struct mm_struct *mm);
190
191
192	/* TLB operations */
193	void (*flush_tlb_user)(void);
194	void (*flush_tlb_kernel)(void);
195	void (*flush_tlb_single)(unsigned long addr);
196	void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
197				 unsigned long va);
198
199	/* Hooks for allocating/releasing pagetable pages */
200	void (*alloc_pt)(struct mm_struct *mm, u32 pfn);
201	void (*alloc_pd)(u32 pfn);
202	void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count);
203	void (*release_pt)(u32 pfn);
204	void (*release_pd)(u32 pfn);
205
206	/* Pagetable manipulation functions */
207	void (*set_pte)(pte_t *ptep, pte_t pteval);
208	void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
209			   pte_t *ptep, pte_t pteval);
210	void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
211	void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
212	void (*pte_update_defer)(struct mm_struct *mm,
213				 unsigned long addr, pte_t *ptep);
214
215#ifdef CONFIG_X86_PAE
216	void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
217	void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
218				pte_t *ptep, pte_t pte);
219	void (*set_pud)(pud_t *pudp, pud_t pudval);
220	void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
221	void (*pmd_clear)(pmd_t *pmdp);
222
223	unsigned long long (*pte_val)(pte_t);
224	unsigned long long (*pmd_val)(pmd_t);
225	unsigned long long (*pgd_val)(pgd_t);
226
227	pte_t (*make_pte)(unsigned long long pte);
228	pmd_t (*make_pmd)(unsigned long long pmd);
229	pgd_t (*make_pgd)(unsigned long long pgd);
230#else
231	unsigned long (*pte_val)(pte_t);
232	unsigned long (*pgd_val)(pgd_t);
233
234	pte_t (*make_pte)(unsigned long pte);
235	pgd_t (*make_pgd)(unsigned long pgd);
236#endif
237
238#ifdef CONFIG_HIGHPTE
239	void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
240#endif
241
242	struct pv_lazy_ops lazy_mode;
243};
244
245/* This contains all the paravirt structures: we get a convenient
246 * number for each function using the offset which we use to indicate
247 * what to patch. */
248struct paravirt_patch_template
249{
250	struct pv_init_ops pv_init_ops;
251	struct pv_time_ops pv_time_ops;
252	struct pv_cpu_ops pv_cpu_ops;
253	struct pv_irq_ops pv_irq_ops;
254	struct pv_apic_ops pv_apic_ops;
255	struct pv_mmu_ops pv_mmu_ops;
256};
257
258extern struct pv_info pv_info;
259extern struct pv_init_ops pv_init_ops;
260extern struct pv_time_ops pv_time_ops;
261extern struct pv_cpu_ops pv_cpu_ops;
262extern struct pv_irq_ops pv_irq_ops;
263extern struct pv_apic_ops pv_apic_ops;
264extern struct pv_mmu_ops pv_mmu_ops;
265
266#define PARAVIRT_PATCH(x)					\
267	(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
268
269#define paravirt_type(op)				\
270	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
271	[paravirt_opptr] "m" (op)
272#define paravirt_clobber(clobber)		\
273	[paravirt_clobber] "i" (clobber)
274
275/*
276 * Generate some code, and mark it as patchable by the
277 * apply_paravirt() alternate instruction patcher.
278 */
279#define _paravirt_alt(insn_string, type, clobber)	\
280	"771:\n\t" insn_string "\n" "772:\n"		\
281	".pushsection .parainstructions,\"a\"\n"	\
282	"  .long 771b\n"				\
283	"  .byte " type "\n"				\
284	"  .byte 772b-771b\n"				\
285	"  .short " clobber "\n"			\
286	".popsection\n"
287
288/* Generate patchable code, with the default asm parameters. */
289#define paravirt_alt(insn_string)					\
290	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
291
292unsigned paravirt_patch_nop(void);
293unsigned paravirt_patch_ignore(unsigned len);
294unsigned paravirt_patch_call(void *insnbuf,
295			     const void *target, u16 tgt_clobbers,
296			     unsigned long addr, u16 site_clobbers,
297			     unsigned len);
298unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
299			    unsigned long addr, unsigned len);
300unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
301				unsigned long addr, unsigned len);
302
303unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
304			      const char *start, const char *end);
305
306int paravirt_disable_iospace(void);
307
308/*
309 * This generates an indirect call based on the operation type number.
310 * The type number, computed in PARAVIRT_PATCH, is derived from the
311 * offset into the paravirt_patch_template structure, and can therefore be
312 * freely converted back into a structure offset.
313 */
314#define PARAVIRT_CALL	"call *%[paravirt_opptr];"
315
316/*
317 * These macros are intended to wrap calls through one of the paravirt
318 * ops structs, so that they can be later identified and patched at
319 * runtime.
320 *
321 * Normally, a call to a pv_op function is a simple indirect call:
322 * (paravirt_ops.operations)(args...).
323 *
324 * Unfortunately, this is a relatively slow operation for modern CPUs,
325 * because it cannot necessarily determine what the destination
326 * address is.  In this case, the address is a runtime constant, so at
327 * the very least we can patch the call to e a simple direct call, or
328 * ideally, patch an inline implementation into the callsite.  (Direct
329 * calls are essentially free, because the call and return addresses
330 * are completely predictable.)
331 *
332 * These macros rely on the standard gcc "regparm(3)" calling
333 * convention, in which the first three arguments are placed in %eax,
334 * %edx, %ecx (in that order), and the remaining arguments are placed
335 * on the stack.  All caller-save registers (eax,edx,ecx) are expected
336 * to be modified (either clobbered or used for return values).
337 *
338 * The call instruction itself is marked by placing its start address
339 * and size into the .parainstructions section, so that
340 * apply_paravirt() in arch/i386/kernel/alternative.c can do the
341 * appropriate patching under the control of the backend pv_init_ops
342 * implementation.
343 *
344 * Unfortunately there's no way to get gcc to generate the args setup
345 * for the call, and then allow the call itself to be generated by an
346 * inline asm.  Because of this, we must do the complete arg setup and
347 * return value handling from within these macros.  This is fairly
348 * cumbersome.
349 *
350 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments.
351 * It could be extended to more arguments, but there would be little
352 * to be gained from that.  For each number of arguments, there are
353 * the two VCALL and CALL variants for void and non-void functions.
354 *
355 * When there is a return value, the invoker of the macro must specify
356 * the return type.  The macro then uses sizeof() on that type to
357 * determine whether its a 32 or 64 bit value, and places the return
358 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for
359 * 64-bit).
360 *
361 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments
362 * in low,high order.
363 *
364 * Small structures are passed and returned in registers.  The macro
365 * calling convention can't directly deal with this, so the wrapper
366 * functions must do this.
367 *
368 * These PVOP_* macros are only defined within this header.  This
369 * means that all uses must be wrapped in inline functions.  This also
370 * makes sure the incoming and outgoing types are always correct.
371 */
372#define __PVOP_CALL(rettype, op, pre, post, ...)			\
373	({								\
374		rettype __ret;						\
375		unsigned long __eax, __edx, __ecx;			\
376		if (sizeof(rettype) > sizeof(unsigned long)) {		\
377			asm volatile(pre				\
378				     paravirt_alt(PARAVIRT_CALL)	\
379				     post				\
380				     : "=a" (__eax), "=d" (__edx),	\
381				       "=c" (__ecx)			\
382				     : paravirt_type(op),		\
383				       paravirt_clobber(CLBR_ANY),	\
384				       ##__VA_ARGS__			\
385				     : "memory", "cc");			\
386			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
387		} else {						\
388			asm volatile(pre				\
389				     paravirt_alt(PARAVIRT_CALL)	\
390				     post				\
391				     : "=a" (__eax), "=d" (__edx),	\
392				       "=c" (__ecx)			\
393				     : paravirt_type(op),		\
394				       paravirt_clobber(CLBR_ANY),	\
395				       ##__VA_ARGS__			\
396				     : "memory", "cc");			\
397			__ret = (rettype)__eax;				\
398		}							\
399		__ret;							\
400	})
401#define __PVOP_VCALL(op, pre, post, ...)				\
402	({								\
403		unsigned long __eax, __edx, __ecx;			\
404		asm volatile(pre					\
405			     paravirt_alt(PARAVIRT_CALL)		\
406			     post					\
407			     : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \
408			     : paravirt_type(op),			\
409			       paravirt_clobber(CLBR_ANY),		\
410			       ##__VA_ARGS__				\
411			     : "memory", "cc");				\
412	})
413
414#define PVOP_CALL0(rettype, op)						\
415	__PVOP_CALL(rettype, op, "", "")
416#define PVOP_VCALL0(op)							\
417	__PVOP_VCALL(op, "", "")
418
419#define PVOP_CALL1(rettype, op, arg1)					\
420	__PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)))
421#define PVOP_VCALL1(op, arg1)						\
422	__PVOP_VCALL(op, "", "", "0" ((u32)(arg1)))
423
424#define PVOP_CALL2(rettype, op, arg1, arg2)				\
425	__PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2)))
426#define PVOP_VCALL2(op, arg1, arg2)					\
427	__PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2)))
428
429#define PVOP_CALL3(rettype, op, arg1, arg2, arg3)			\
430	__PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)),		\
431		    "1"((u32)(arg2)), "2"((u32)(arg3)))
432#define PVOP_VCALL3(op, arg1, arg2, arg3)				\
433	__PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)),	\
434		     "2"((u32)(arg3)))
435
436#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
437	__PVOP_CALL(rettype, op,					\
438		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
439		    "0" ((u32)(arg1)), "1" ((u32)(arg2)),		\
440		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
441#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
442	__PVOP_VCALL(op,						\
443		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
444		    "0" ((u32)(arg1)), "1" ((u32)(arg2)),		\
445		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
446
447static inline int paravirt_enabled(void)
448{
449	return pv_info.paravirt_enabled;
450}
451
452static inline void load_esp0(struct tss_struct *tss,
453			     struct thread_struct *thread)
454{
455	PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread);
456}
457
458#define ARCH_SETUP			pv_init_ops.arch_setup();
459static inline unsigned long get_wallclock(void)
460{
461	return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock);
462}
463
464static inline int set_wallclock(unsigned long nowtime)
465{
466	return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime);
467}
468
469static inline void (*choose_time_init(void))(void)
470{
471	return pv_time_ops.time_init;
472}
473
474/* The paravirtualized CPUID instruction. */
475static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
476			   unsigned int *ecx, unsigned int *edx)
477{
478	PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
479}
480
481/*
482 * These special macros can be used to get or set a debugging register
483 */
484static inline unsigned long paravirt_get_debugreg(int reg)
485{
486	return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
487}
488#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
489static inline void set_debugreg(unsigned long val, int reg)
490{
491	PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
492}
493
494static inline void clts(void)
495{
496	PVOP_VCALL0(pv_cpu_ops.clts);
497}
498
499static inline unsigned long read_cr0(void)
500{
501	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
502}
503
504static inline void write_cr0(unsigned long x)
505{
506	PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
507}
508
509static inline unsigned long read_cr2(void)
510{
511	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
512}
513
514static inline void write_cr2(unsigned long x)
515{
516	PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
517}
518
519static inline unsigned long read_cr3(void)
520{
521	return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
522}
523
524static inline void write_cr3(unsigned long x)
525{
526	PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
527}
528
529static inline unsigned long read_cr4(void)
530{
531	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
532}
533static inline unsigned long read_cr4_safe(void)
534{
535	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
536}
537
538static inline void write_cr4(unsigned long x)
539{
540	PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
541}
542
543static inline void raw_safe_halt(void)
544{
545	PVOP_VCALL0(pv_irq_ops.safe_halt);
546}
547
548static inline void halt(void)
549{
550	PVOP_VCALL0(pv_irq_ops.safe_halt);
551}
552
553static inline void wbinvd(void)
554{
555	PVOP_VCALL0(pv_cpu_ops.wbinvd);
556}
557
558#define get_kernel_rpl()  (pv_info.kernel_rpl)
559
560static inline u64 paravirt_read_msr(unsigned msr, int *err)
561{
562	return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
563}
564static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
565{
566	return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
567}
568
569/* These should all do BUG_ON(_err), but our headers are too tangled. */
570#define rdmsr(msr,val1,val2) do {		\
571	int _err;				\
572	u64 _l = paravirt_read_msr(msr, &_err);	\
573	val1 = (u32)_l;				\
574	val2 = _l >> 32;			\
575} while(0)
576
577#define wrmsr(msr,val1,val2) do {		\
578	paravirt_write_msr(msr, val1, val2);	\
579} while(0)
580
581#define rdmsrl(msr,val) do {			\
582	int _err;				\
583	val = paravirt_read_msr(msr, &_err);	\
584} while(0)
585
586#define wrmsrl(msr,val)		wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32)
587#define wrmsr_safe(msr,a,b)	paravirt_write_msr(msr, a, b)
588
589/* rdmsr with exception handling */
590#define rdmsr_safe(msr,a,b) ({			\
591	int _err;				\
592	u64 _l = paravirt_read_msr(msr, &_err);	\
593	(*a) = (u32)_l;				\
594	(*b) = _l >> 32;			\
595	_err; })
596
597
598static inline u64 paravirt_read_tsc(void)
599{
600	return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
601}
602
603#define rdtscl(low) do {			\
604	u64 _l = paravirt_read_tsc();		\
605	low = (int)_l;				\
606} while(0)
607
608#define rdtscll(val) (val = paravirt_read_tsc())
609
610static inline unsigned long long paravirt_sched_clock(void)
611{
612	return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
613}
614#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
615
616#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
617
618static inline unsigned long long paravirt_read_pmc(int counter)
619{
620	return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
621}
622
623#define rdpmc(counter,low,high) do {		\
624	u64 _l = paravirt_read_pmc(counter);	\
625	low = (u32)_l;				\
626	high = _l >> 32;			\
627} while(0)
628
629static inline void load_TR_desc(void)
630{
631	PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
632}
633static inline void load_gdt(const struct Xgt_desc_struct *dtr)
634{
635	PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
636}
637static inline void load_idt(const struct Xgt_desc_struct *dtr)
638{
639	PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
640}
641static inline void set_ldt(const void *addr, unsigned entries)
642{
643	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
644}
645static inline void store_gdt(struct Xgt_desc_struct *dtr)
646{
647	PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
648}
649static inline void store_idt(struct Xgt_desc_struct *dtr)
650{
651	PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
652}
653static inline unsigned long paravirt_store_tr(void)
654{
655	return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
656}
657#define store_tr(tr)	((tr) = paravirt_store_tr())
658static inline void load_TLS(struct thread_struct *t, unsigned cpu)
659{
660	PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
661}
662static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high)
663{
664	PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high);
665}
666static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high)
667{
668	PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high);
669}
670static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high)
671{
672	PVOP_VCALL4(pv_cpu_ops.write_idt_entry, dt, entry, low, high);
673}
674static inline void set_iopl_mask(unsigned mask)
675{
676	PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
677}
678
679/* The paravirtualized I/O functions */
680static inline void slow_down_io(void) {
681	pv_cpu_ops.io_delay();
682#ifdef REALLY_SLOW_IO
683	pv_cpu_ops.io_delay();
684	pv_cpu_ops.io_delay();
685	pv_cpu_ops.io_delay();
686#endif
687}
688
689#ifdef CONFIG_X86_LOCAL_APIC
690/*
691 * Basic functions accessing APICs.
692 */
693static inline void apic_write(unsigned long reg, unsigned long v)
694{
695	PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
696}
697
698static inline void apic_write_atomic(unsigned long reg, unsigned long v)
699{
700	PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
701}
702
703static inline unsigned long apic_read(unsigned long reg)
704{
705	return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
706}
707
708static inline void setup_boot_clock(void)
709{
710	PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
711}
712
713static inline void setup_secondary_clock(void)
714{
715	PVOP_VCALL0(pv_apic_ops.setup_secondary_clock);
716}
717#endif
718
719static inline void paravirt_post_allocator_init(void)
720{
721	if (pv_init_ops.post_allocator_init)
722		(*pv_init_ops.post_allocator_init)();
723}
724
725static inline void paravirt_pagetable_setup_start(pgd_t *base)
726{
727	(*pv_mmu_ops.pagetable_setup_start)(base);
728}
729
730static inline void paravirt_pagetable_setup_done(pgd_t *base)
731{
732	(*pv_mmu_ops.pagetable_setup_done)(base);
733}
734
735#ifdef CONFIG_SMP
736static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
737				    unsigned long start_esp)
738{
739	PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
740		    phys_apicid, start_eip, start_esp);
741}
742#endif
743
744static inline void paravirt_activate_mm(struct mm_struct *prev,
745					struct mm_struct *next)
746{
747	PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
748}
749
750static inline void arch_dup_mmap(struct mm_struct *oldmm,
751				 struct mm_struct *mm)
752{
753	PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
754}
755
756static inline void arch_exit_mmap(struct mm_struct *mm)
757{
758	PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
759}
760
761static inline void __flush_tlb(void)
762{
763	PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
764}
765static inline void __flush_tlb_global(void)
766{
767	PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
768}
769static inline void __flush_tlb_single(unsigned long addr)
770{
771	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
772}
773
774static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
775				    unsigned long va)
776{
777	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
778}
779
780static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn)
781{
782	PVOP_VCALL2(pv_mmu_ops.alloc_pt, mm, pfn);
783}
784static inline void paravirt_release_pt(unsigned pfn)
785{
786	PVOP_VCALL1(pv_mmu_ops.release_pt, pfn);
787}
788
789static inline void paravirt_alloc_pd(unsigned pfn)
790{
791	PVOP_VCALL1(pv_mmu_ops.alloc_pd, pfn);
792}
793
794static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn,
795					   unsigned start, unsigned count)
796{
797	PVOP_VCALL4(pv_mmu_ops.alloc_pd_clone, pfn, clonepfn, start, count);
798}
799static inline void paravirt_release_pd(unsigned pfn)
800{
801	PVOP_VCALL1(pv_mmu_ops.release_pd, pfn);
802}
803
804#ifdef CONFIG_HIGHPTE
805static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
806{
807	unsigned long ret;
808	ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type);
809	return (void *)ret;
810}
811#endif
812
813static inline void pte_update(struct mm_struct *mm, unsigned long addr,
814			      pte_t *ptep)
815{
816	PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
817}
818
819static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
820				    pte_t *ptep)
821{
822	PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
823}
824
825#ifdef CONFIG_X86_PAE
826static inline pte_t __pte(unsigned long long val)
827{
828	unsigned long long ret = PVOP_CALL2(unsigned long long,
829					    pv_mmu_ops.make_pte,
830					    val, val >> 32);
831	return (pte_t) { ret, ret >> 32 };
832}
833
834static inline pmd_t __pmd(unsigned long long val)
835{
836	return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd,
837				    val, val >> 32) };
838}
839
840static inline pgd_t __pgd(unsigned long long val)
841{
842	return (pgd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pgd,
843				    val, val >> 32) };
844}
845
846static inline unsigned long long pte_val(pte_t x)
847{
848	return PVOP_CALL2(unsigned long long, pv_mmu_ops.pte_val,
849			  x.pte_low, x.pte_high);
850}
851
852static inline unsigned long long pmd_val(pmd_t x)
853{
854	return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val,
855			  x.pmd, x.pmd >> 32);
856}
857
858static inline unsigned long long pgd_val(pgd_t x)
859{
860	return PVOP_CALL2(unsigned long long, pv_mmu_ops.pgd_val,
861			  x.pgd, x.pgd >> 32);
862}
863
864static inline void set_pte(pte_t *ptep, pte_t pteval)
865{
866	PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high);
867}
868
869static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
870			      pte_t *ptep, pte_t pteval)
871{
872	/* 5 arg words */
873	pv_mmu_ops.set_pte_at(mm, addr, ptep, pteval);
874}
875
876static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
877{
878	PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
879		    pteval.pte_low, pteval.pte_high);
880}
881
882static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
883				   pte_t *ptep, pte_t pte)
884{
885	/* 5 arg words */
886	pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
887}
888
889static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
890{
891	PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp,
892		    pmdval.pmd, pmdval.pmd >> 32);
893}
894
895static inline void set_pud(pud_t *pudp, pud_t pudval)
896{
897	PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
898		    pudval.pgd.pgd, pudval.pgd.pgd >> 32);
899}
900
901static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
902{
903	PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
904}
905
906static inline void pmd_clear(pmd_t *pmdp)
907{
908	PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
909}
910
911#else  /* !CONFIG_X86_PAE */
912
913static inline pte_t __pte(unsigned long val)
914{
915	return (pte_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pte, val) };
916}
917
918static inline pgd_t __pgd(unsigned long val)
919{
920	return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) };
921}
922
923static inline unsigned long pte_val(pte_t x)
924{
925	return PVOP_CALL1(unsigned long, pv_mmu_ops.pte_val, x.pte_low);
926}
927
928static inline unsigned long pgd_val(pgd_t x)
929{
930	return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd);
931}
932
933static inline void set_pte(pte_t *ptep, pte_t pteval)
934{
935	PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low);
936}
937
938static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
939			      pte_t *ptep, pte_t pteval)
940{
941	PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pteval.pte_low);
942}
943
944static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
945{
946	PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd);
947}
948#endif	/* CONFIG_X86_PAE */
949
950/* Lazy mode for batching updates / context switch */
951enum paravirt_lazy_mode {
952	PARAVIRT_LAZY_NONE,
953	PARAVIRT_LAZY_MMU,
954	PARAVIRT_LAZY_CPU,
955};
956
957enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
958void paravirt_enter_lazy_cpu(void);
959void paravirt_leave_lazy_cpu(void);
960void paravirt_enter_lazy_mmu(void);
961void paravirt_leave_lazy_mmu(void);
962void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
963
964#define  __HAVE_ARCH_ENTER_LAZY_CPU_MODE
965static inline void arch_enter_lazy_cpu_mode(void)
966{
967	PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
968}
969
970static inline void arch_leave_lazy_cpu_mode(void)
971{
972	PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
973}
974
975static inline void arch_flush_lazy_cpu_mode(void)
976{
977	if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
978		arch_leave_lazy_cpu_mode();
979		arch_enter_lazy_cpu_mode();
980	}
981}
982
983
984#define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
985static inline void arch_enter_lazy_mmu_mode(void)
986{
987	PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter);
988}
989
990static inline void arch_leave_lazy_mmu_mode(void)
991{
992	PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
993}
994
995static inline void arch_flush_lazy_mmu_mode(void)
996{
997	if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
998		arch_leave_lazy_mmu_mode();
999		arch_enter_lazy_mmu_mode();
1000	}
1001}
1002
1003void _paravirt_nop(void);
1004#define paravirt_nop	((void *)_paravirt_nop)
1005
1006/* These all sit in the .parainstructions section to tell us what to patch. */
1007struct paravirt_patch_site {
1008	u8 *instr; 		/* original instructions */
1009	u8 instrtype;		/* type of this instruction */
1010	u8 len;			/* length of original instruction */
1011	u16 clobbers;		/* what registers you may clobber */
1012};
1013
1014extern struct paravirt_patch_site __parainstructions[],
1015	__parainstructions_end[];
1016
1017static inline unsigned long __raw_local_save_flags(void)
1018{
1019	unsigned long f;
1020
1021	asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
1022				  PARAVIRT_CALL
1023				  "popl %%edx; popl %%ecx")
1024		     : "=a"(f)
1025		     : paravirt_type(pv_irq_ops.save_fl),
1026		       paravirt_clobber(CLBR_EAX)
1027		     : "memory", "cc");
1028	return f;
1029}
1030
1031static inline void raw_local_irq_restore(unsigned long f)
1032{
1033	asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
1034				  PARAVIRT_CALL
1035				  "popl %%edx; popl %%ecx")
1036		     : "=a"(f)
1037		     : "0"(f),
1038		       paravirt_type(pv_irq_ops.restore_fl),
1039		       paravirt_clobber(CLBR_EAX)
1040		     : "memory", "cc");
1041}
1042
1043static inline void raw_local_irq_disable(void)
1044{
1045	asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
1046				  PARAVIRT_CALL
1047				  "popl %%edx; popl %%ecx")
1048		     :
1049		     : paravirt_type(pv_irq_ops.irq_disable),
1050		       paravirt_clobber(CLBR_EAX)
1051		     : "memory", "eax", "cc");
1052}
1053
1054static inline void raw_local_irq_enable(void)
1055{
1056	asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;"
1057				  PARAVIRT_CALL
1058				  "popl %%edx; popl %%ecx")
1059		     :
1060		     : paravirt_type(pv_irq_ops.irq_enable),
1061		       paravirt_clobber(CLBR_EAX)
1062		     : "memory", "eax", "cc");
1063}
1064
1065static inline unsigned long __raw_local_irq_save(void)
1066{
1067	unsigned long f;
1068
1069	f = __raw_local_save_flags();
1070	raw_local_irq_disable();
1071	return f;
1072}
1073
1074#define CLI_STRING							\
1075	_paravirt_alt("pushl %%ecx; pushl %%edx;"			\
1076		      "call *%[paravirt_cli_opptr];"			\
1077		      "popl %%edx; popl %%ecx",				\
1078		      "%c[paravirt_cli_type]", "%c[paravirt_clobber]")
1079
1080#define STI_STRING							\
1081	_paravirt_alt("pushl %%ecx; pushl %%edx;"			\
1082		      "call *%[paravirt_sti_opptr];"			\
1083		      "popl %%edx; popl %%ecx",				\
1084		      "%c[paravirt_sti_type]", "%c[paravirt_clobber]")
1085
1086#define CLI_STI_CLOBBERS , "%eax"
1087#define CLI_STI_INPUT_ARGS						\
1088	,								\
1089	[paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)),		\
1090	[paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable),		\
1091	[paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)),		\
1092	[paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable),		\
1093	paravirt_clobber(CLBR_EAX)
1094
1095/* Make sure as little as possible of this mess escapes. */
1096#undef PARAVIRT_CALL
1097#undef __PVOP_CALL
1098#undef __PVOP_VCALL
1099#undef PVOP_VCALL0
1100#undef PVOP_CALL0
1101#undef PVOP_VCALL1
1102#undef PVOP_CALL1
1103#undef PVOP_VCALL2
1104#undef PVOP_CALL2
1105#undef PVOP_VCALL3
1106#undef PVOP_CALL3
1107#undef PVOP_VCALL4
1108#undef PVOP_CALL4
1109
1110#else  /* __ASSEMBLY__ */
1111
1112#define PARA_PATCH(struct, off)	((PARAVIRT_PATCH_##struct + (off)) / 4)
1113
1114#define PARA_SITE(ptype, clobbers, ops)		\
1115771:;						\
1116	ops;					\
1117772:;						\
1118	.pushsection .parainstructions,"a";	\
1119	 .long 771b;				\
1120	 .byte ptype;				\
1121	 .byte 772b-771b;			\
1122	 .short clobbers;			\
1123	.popsection
1124
1125#define INTERRUPT_RETURN						\
1126	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,	\
1127		  jmp *%cs:pv_cpu_ops+PV_CPU_iret)
1128
1129#define DISABLE_INTERRUPTS(clobbers)					\
1130	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
1131		  pushl %eax; pushl %ecx; pushl %edx;			\
1132		  call *%cs:pv_irq_ops+PV_IRQ_irq_disable;		\
1133		  popl %edx; popl %ecx; popl %eax)			\
1134
1135#define ENABLE_INTERRUPTS(clobbers)					\
1136	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
1137		  pushl %eax; pushl %ecx; pushl %edx;			\
1138		  call *%cs:pv_irq_ops+PV_IRQ_irq_enable;		\
1139		  popl %edx; popl %ecx; popl %eax)
1140
1141#define ENABLE_INTERRUPTS_SYSEXIT					       \
1142	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\
1143		  jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit)
1144
1145#define GET_CR0_INTO_EAX			\
1146	push %ecx; push %edx;			\
1147	call *pv_cpu_ops+PV_CPU_read_cr0;	\
1148	pop %edx; pop %ecx
1149
1150#endif /* __ASSEMBLY__ */
1151#endif /* CONFIG_PARAVIRT */
1152#endif	/* __ASM_PARAVIRT_H */
1153