1#ifndef __ASM_PARAVIRT_H 2#define __ASM_PARAVIRT_H 3/* Various instructions on x86 need to be replaced for 4 * para-virtualization: those hooks are defined here. */ 5 6#ifdef CONFIG_PARAVIRT 7#include <asm/page.h> 8 9/* Bitmask of what can be clobbered: usually at least eax. */ 10#define CLBR_NONE 0x0 11#define CLBR_EAX 0x1 12#define CLBR_ECX 0x2 13#define CLBR_EDX 0x4 14#define CLBR_ANY 0x7 15 16#ifndef __ASSEMBLY__ 17#include <linux/types.h> 18#include <linux/cpumask.h> 19#include <asm/kmap_types.h> 20 21struct page; 22struct thread_struct; 23struct Xgt_desc_struct; 24struct tss_struct; 25struct mm_struct; 26struct desc_struct; 27 28/* general info */ 29struct pv_info { 30 unsigned int kernel_rpl; 31 int shared_kernel_pmd; 32 int paravirt_enabled; 33 const char *name; 34}; 35 36struct pv_init_ops { 37 /* 38 * Patch may replace one of the defined code sequences with 39 * arbitrary code, subject to the same register constraints. 40 * This generally means the code is not free to clobber any 41 * registers other than EAX. The patch function should return 42 * the number of bytes of code generated, as we nop pad the 43 * rest in generic code. 44 */ 45 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, 46 unsigned long addr, unsigned len); 47 48 /* Basic arch-specific setup */ 49 void (*arch_setup)(void); 50 char *(*memory_setup)(void); 51 void (*post_allocator_init)(void); 52 53 /* Print a banner to identify the environment */ 54 void (*banner)(void); 55}; 56 57 58struct pv_lazy_ops { 59 /* Set deferred update mode, used for batching operations. */ 60 void (*enter)(void); 61 void (*leave)(void); 62}; 63 64struct pv_time_ops { 65 void (*time_init)(void); 66 67 /* Set and set time of day */ 68 unsigned long (*get_wallclock)(void); 69 int (*set_wallclock)(unsigned long); 70 71 unsigned long long (*sched_clock)(void); 72 unsigned long (*get_cpu_khz)(void); 73}; 74 75struct pv_cpu_ops { 76 /* hooks for various privileged instructions */ 77 unsigned long (*get_debugreg)(int regno); 78 void (*set_debugreg)(int regno, unsigned long value); 79 80 void (*clts)(void); 81 82 unsigned long (*read_cr0)(void); 83 void (*write_cr0)(unsigned long); 84 85 unsigned long (*read_cr4_safe)(void); 86 unsigned long (*read_cr4)(void); 87 void (*write_cr4)(unsigned long); 88 89 /* Segment descriptor handling */ 90 void (*load_tr_desc)(void); 91 void (*load_gdt)(const struct Xgt_desc_struct *); 92 void (*load_idt)(const struct Xgt_desc_struct *); 93 void (*store_gdt)(struct Xgt_desc_struct *); 94 void (*store_idt)(struct Xgt_desc_struct *); 95 void (*set_ldt)(const void *desc, unsigned entries); 96 unsigned long (*store_tr)(void); 97 void (*load_tls)(struct thread_struct *t, unsigned int cpu); 98 void (*write_ldt_entry)(struct desc_struct *, 99 int entrynum, u32 low, u32 high); 100 void (*write_gdt_entry)(struct desc_struct *, 101 int entrynum, u32 low, u32 high); 102 void (*write_idt_entry)(struct desc_struct *, 103 int entrynum, u32 low, u32 high); 104 void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); 105 106 void (*set_iopl_mask)(unsigned mask); 107 108 void (*wbinvd)(void); 109 void (*io_delay)(void); 110 111 /* cpuid emulation, mostly so that caps bits can be disabled */ 112 void (*cpuid)(unsigned int *eax, unsigned int *ebx, 113 unsigned int *ecx, unsigned int *edx); 114 115 /* MSR, PMC and TSR operations. 116 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ 117 u64 (*read_msr)(unsigned int msr, int *err); 118 int (*write_msr)(unsigned int msr, u64 val); 119 120 u64 (*read_tsc)(void); 121 u64 (*read_pmc)(void); 122 123 /* These two are jmp to, not actually called. */ 124 void (*irq_enable_sysexit)(void); 125 void (*iret)(void); 126 127 struct pv_lazy_ops lazy_mode; 128}; 129 130struct pv_irq_ops { 131 void (*init_IRQ)(void); 132 133 /* 134 * Get/set interrupt state. save_fl and restore_fl are only 135 * expected to use X86_EFLAGS_IF; all other bits 136 * returned from save_fl are undefined, and may be ignored by 137 * restore_fl. 138 */ 139 unsigned long (*save_fl)(void); 140 void (*restore_fl)(unsigned long); 141 void (*irq_disable)(void); 142 void (*irq_enable)(void); 143 void (*safe_halt)(void); 144 void (*halt)(void); 145}; 146 147struct pv_apic_ops { 148#ifdef CONFIG_X86_LOCAL_APIC 149 /* 150 * Direct APIC operations, principally for VMI. Ideally 151 * these shouldn't be in this interface. 152 */ 153 void (*apic_write)(unsigned long reg, unsigned long v); 154 void (*apic_write_atomic)(unsigned long reg, unsigned long v); 155 unsigned long (*apic_read)(unsigned long reg); 156 void (*setup_boot_clock)(void); 157 void (*setup_secondary_clock)(void); 158 159 void (*startup_ipi_hook)(int phys_apicid, 160 unsigned long start_eip, 161 unsigned long start_esp); 162#endif 163}; 164 165struct pv_mmu_ops { 166 /* 167 * Called before/after init_mm pagetable setup. setup_start 168 * may reset %cr3, and may pre-install parts of the pagetable; 169 * pagetable setup is expected to preserve any existing 170 * mapping. 171 */ 172 void (*pagetable_setup_start)(pgd_t *pgd_base); 173 void (*pagetable_setup_done)(pgd_t *pgd_base); 174 175 unsigned long (*read_cr2)(void); 176 void (*write_cr2)(unsigned long); 177 178 unsigned long (*read_cr3)(void); 179 void (*write_cr3)(unsigned long); 180 181 /* 182 * Hooks for intercepting the creation/use/destruction of an 183 * mm_struct. 184 */ 185 void (*activate_mm)(struct mm_struct *prev, 186 struct mm_struct *next); 187 void (*dup_mmap)(struct mm_struct *oldmm, 188 struct mm_struct *mm); 189 void (*exit_mmap)(struct mm_struct *mm); 190 191 192 /* TLB operations */ 193 void (*flush_tlb_user)(void); 194 void (*flush_tlb_kernel)(void); 195 void (*flush_tlb_single)(unsigned long addr); 196 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, 197 unsigned long va); 198 199 /* Hooks for allocating/releasing pagetable pages */ 200 void (*alloc_pt)(struct mm_struct *mm, u32 pfn); 201 void (*alloc_pd)(u32 pfn); 202 void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 203 void (*release_pt)(u32 pfn); 204 void (*release_pd)(u32 pfn); 205 206 /* Pagetable manipulation functions */ 207 void (*set_pte)(pte_t *ptep, pte_t pteval); 208 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, 209 pte_t *ptep, pte_t pteval); 210 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); 211 void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 212 void (*pte_update_defer)(struct mm_struct *mm, 213 unsigned long addr, pte_t *ptep); 214 215#ifdef CONFIG_X86_PAE 216 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); 217 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, 218 pte_t *ptep, pte_t pte); 219 void (*set_pud)(pud_t *pudp, pud_t pudval); 220 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 221 void (*pmd_clear)(pmd_t *pmdp); 222 223 unsigned long long (*pte_val)(pte_t); 224 unsigned long long (*pmd_val)(pmd_t); 225 unsigned long long (*pgd_val)(pgd_t); 226 227 pte_t (*make_pte)(unsigned long long pte); 228 pmd_t (*make_pmd)(unsigned long long pmd); 229 pgd_t (*make_pgd)(unsigned long long pgd); 230#else 231 unsigned long (*pte_val)(pte_t); 232 unsigned long (*pgd_val)(pgd_t); 233 234 pte_t (*make_pte)(unsigned long pte); 235 pgd_t (*make_pgd)(unsigned long pgd); 236#endif 237 238#ifdef CONFIG_HIGHPTE 239 void *(*kmap_atomic_pte)(struct page *page, enum km_type type); 240#endif 241 242 struct pv_lazy_ops lazy_mode; 243}; 244 245/* This contains all the paravirt structures: we get a convenient 246 * number for each function using the offset which we use to indicate 247 * what to patch. */ 248struct paravirt_patch_template 249{ 250 struct pv_init_ops pv_init_ops; 251 struct pv_time_ops pv_time_ops; 252 struct pv_cpu_ops pv_cpu_ops; 253 struct pv_irq_ops pv_irq_ops; 254 struct pv_apic_ops pv_apic_ops; 255 struct pv_mmu_ops pv_mmu_ops; 256}; 257 258extern struct pv_info pv_info; 259extern struct pv_init_ops pv_init_ops; 260extern struct pv_time_ops pv_time_ops; 261extern struct pv_cpu_ops pv_cpu_ops; 262extern struct pv_irq_ops pv_irq_ops; 263extern struct pv_apic_ops pv_apic_ops; 264extern struct pv_mmu_ops pv_mmu_ops; 265 266#define PARAVIRT_PATCH(x) \ 267 (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) 268 269#define paravirt_type(op) \ 270 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ 271 [paravirt_opptr] "m" (op) 272#define paravirt_clobber(clobber) \ 273 [paravirt_clobber] "i" (clobber) 274 275/* 276 * Generate some code, and mark it as patchable by the 277 * apply_paravirt() alternate instruction patcher. 278 */ 279#define _paravirt_alt(insn_string, type, clobber) \ 280 "771:\n\t" insn_string "\n" "772:\n" \ 281 ".pushsection .parainstructions,\"a\"\n" \ 282 " .long 771b\n" \ 283 " .byte " type "\n" \ 284 " .byte 772b-771b\n" \ 285 " .short " clobber "\n" \ 286 ".popsection\n" 287 288/* Generate patchable code, with the default asm parameters. */ 289#define paravirt_alt(insn_string) \ 290 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") 291 292unsigned paravirt_patch_nop(void); 293unsigned paravirt_patch_ignore(unsigned len); 294unsigned paravirt_patch_call(void *insnbuf, 295 const void *target, u16 tgt_clobbers, 296 unsigned long addr, u16 site_clobbers, 297 unsigned len); 298unsigned paravirt_patch_jmp(void *insnbuf, const void *target, 299 unsigned long addr, unsigned len); 300unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 301 unsigned long addr, unsigned len); 302 303unsigned paravirt_patch_insns(void *insnbuf, unsigned len, 304 const char *start, const char *end); 305 306int paravirt_disable_iospace(void); 307 308/* 309 * This generates an indirect call based on the operation type number. 310 * The type number, computed in PARAVIRT_PATCH, is derived from the 311 * offset into the paravirt_patch_template structure, and can therefore be 312 * freely converted back into a structure offset. 313 */ 314#define PARAVIRT_CALL "call *%[paravirt_opptr];" 315 316/* 317 * These macros are intended to wrap calls through one of the paravirt 318 * ops structs, so that they can be later identified and patched at 319 * runtime. 320 * 321 * Normally, a call to a pv_op function is a simple indirect call: 322 * (paravirt_ops.operations)(args...). 323 * 324 * Unfortunately, this is a relatively slow operation for modern CPUs, 325 * because it cannot necessarily determine what the destination 326 * address is. In this case, the address is a runtime constant, so at 327 * the very least we can patch the call to e a simple direct call, or 328 * ideally, patch an inline implementation into the callsite. (Direct 329 * calls are essentially free, because the call and return addresses 330 * are completely predictable.) 331 * 332 * These macros rely on the standard gcc "regparm(3)" calling 333 * convention, in which the first three arguments are placed in %eax, 334 * %edx, %ecx (in that order), and the remaining arguments are placed 335 * on the stack. All caller-save registers (eax,edx,ecx) are expected 336 * to be modified (either clobbered or used for return values). 337 * 338 * The call instruction itself is marked by placing its start address 339 * and size into the .parainstructions section, so that 340 * apply_paravirt() in arch/i386/kernel/alternative.c can do the 341 * appropriate patching under the control of the backend pv_init_ops 342 * implementation. 343 * 344 * Unfortunately there's no way to get gcc to generate the args setup 345 * for the call, and then allow the call itself to be generated by an 346 * inline asm. Because of this, we must do the complete arg setup and 347 * return value handling from within these macros. This is fairly 348 * cumbersome. 349 * 350 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. 351 * It could be extended to more arguments, but there would be little 352 * to be gained from that. For each number of arguments, there are 353 * the two VCALL and CALL variants for void and non-void functions. 354 * 355 * When there is a return value, the invoker of the macro must specify 356 * the return type. The macro then uses sizeof() on that type to 357 * determine whether its a 32 or 64 bit value, and places the return 358 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for 359 * 64-bit). 360 * 361 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments 362 * in low,high order. 363 * 364 * Small structures are passed and returned in registers. The macro 365 * calling convention can't directly deal with this, so the wrapper 366 * functions must do this. 367 * 368 * These PVOP_* macros are only defined within this header. This 369 * means that all uses must be wrapped in inline functions. This also 370 * makes sure the incoming and outgoing types are always correct. 371 */ 372#define __PVOP_CALL(rettype, op, pre, post, ...) \ 373 ({ \ 374 rettype __ret; \ 375 unsigned long __eax, __edx, __ecx; \ 376 if (sizeof(rettype) > sizeof(unsigned long)) { \ 377 asm volatile(pre \ 378 paravirt_alt(PARAVIRT_CALL) \ 379 post \ 380 : "=a" (__eax), "=d" (__edx), \ 381 "=c" (__ecx) \ 382 : paravirt_type(op), \ 383 paravirt_clobber(CLBR_ANY), \ 384 ##__VA_ARGS__ \ 385 : "memory", "cc"); \ 386 __ret = (rettype)((((u64)__edx) << 32) | __eax); \ 387 } else { \ 388 asm volatile(pre \ 389 paravirt_alt(PARAVIRT_CALL) \ 390 post \ 391 : "=a" (__eax), "=d" (__edx), \ 392 "=c" (__ecx) \ 393 : paravirt_type(op), \ 394 paravirt_clobber(CLBR_ANY), \ 395 ##__VA_ARGS__ \ 396 : "memory", "cc"); \ 397 __ret = (rettype)__eax; \ 398 } \ 399 __ret; \ 400 }) 401#define __PVOP_VCALL(op, pre, post, ...) \ 402 ({ \ 403 unsigned long __eax, __edx, __ecx; \ 404 asm volatile(pre \ 405 paravirt_alt(PARAVIRT_CALL) \ 406 post \ 407 : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ 408 : paravirt_type(op), \ 409 paravirt_clobber(CLBR_ANY), \ 410 ##__VA_ARGS__ \ 411 : "memory", "cc"); \ 412 }) 413 414#define PVOP_CALL0(rettype, op) \ 415 __PVOP_CALL(rettype, op, "", "") 416#define PVOP_VCALL0(op) \ 417 __PVOP_VCALL(op, "", "") 418 419#define PVOP_CALL1(rettype, op, arg1) \ 420 __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1))) 421#define PVOP_VCALL1(op, arg1) \ 422 __PVOP_VCALL(op, "", "", "0" ((u32)(arg1))) 423 424#define PVOP_CALL2(rettype, op, arg1, arg2) \ 425 __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) 426#define PVOP_VCALL2(op, arg1, arg2) \ 427 __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) 428 429#define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ 430 __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), \ 431 "1"((u32)(arg2)), "2"((u32)(arg3))) 432#define PVOP_VCALL3(op, arg1, arg2, arg3) \ 433 __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)), \ 434 "2"((u32)(arg3))) 435 436#define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ 437 __PVOP_CALL(rettype, op, \ 438 "push %[_arg4];", "lea 4(%%esp),%%esp;", \ 439 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ 440 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) 441#define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ 442 __PVOP_VCALL(op, \ 443 "push %[_arg4];", "lea 4(%%esp),%%esp;", \ 444 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ 445 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) 446 447static inline int paravirt_enabled(void) 448{ 449 return pv_info.paravirt_enabled; 450} 451 452static inline void load_esp0(struct tss_struct *tss, 453 struct thread_struct *thread) 454{ 455 PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread); 456} 457 458#define ARCH_SETUP pv_init_ops.arch_setup(); 459static inline unsigned long get_wallclock(void) 460{ 461 return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); 462} 463 464static inline int set_wallclock(unsigned long nowtime) 465{ 466 return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); 467} 468 469static inline void (*choose_time_init(void))(void) 470{ 471 return pv_time_ops.time_init; 472} 473 474/* The paravirtualized CPUID instruction. */ 475static inline void __cpuid(unsigned int *eax, unsigned int *ebx, 476 unsigned int *ecx, unsigned int *edx) 477{ 478 PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); 479} 480 481/* 482 * These special macros can be used to get or set a debugging register 483 */ 484static inline unsigned long paravirt_get_debugreg(int reg) 485{ 486 return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); 487} 488#define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) 489static inline void set_debugreg(unsigned long val, int reg) 490{ 491 PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); 492} 493 494static inline void clts(void) 495{ 496 PVOP_VCALL0(pv_cpu_ops.clts); 497} 498 499static inline unsigned long read_cr0(void) 500{ 501 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); 502} 503 504static inline void write_cr0(unsigned long x) 505{ 506 PVOP_VCALL1(pv_cpu_ops.write_cr0, x); 507} 508 509static inline unsigned long read_cr2(void) 510{ 511 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); 512} 513 514static inline void write_cr2(unsigned long x) 515{ 516 PVOP_VCALL1(pv_mmu_ops.write_cr2, x); 517} 518 519static inline unsigned long read_cr3(void) 520{ 521 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); 522} 523 524static inline void write_cr3(unsigned long x) 525{ 526 PVOP_VCALL1(pv_mmu_ops.write_cr3, x); 527} 528 529static inline unsigned long read_cr4(void) 530{ 531 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 532} 533static inline unsigned long read_cr4_safe(void) 534{ 535 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); 536} 537 538static inline void write_cr4(unsigned long x) 539{ 540 PVOP_VCALL1(pv_cpu_ops.write_cr4, x); 541} 542 543static inline void raw_safe_halt(void) 544{ 545 PVOP_VCALL0(pv_irq_ops.safe_halt); 546} 547 548static inline void halt(void) 549{ 550 PVOP_VCALL0(pv_irq_ops.safe_halt); 551} 552 553static inline void wbinvd(void) 554{ 555 PVOP_VCALL0(pv_cpu_ops.wbinvd); 556} 557 558#define get_kernel_rpl() (pv_info.kernel_rpl) 559 560static inline u64 paravirt_read_msr(unsigned msr, int *err) 561{ 562 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); 563} 564static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) 565{ 566 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); 567} 568 569/* These should all do BUG_ON(_err), but our headers are too tangled. */ 570#define rdmsr(msr,val1,val2) do { \ 571 int _err; \ 572 u64 _l = paravirt_read_msr(msr, &_err); \ 573 val1 = (u32)_l; \ 574 val2 = _l >> 32; \ 575} while(0) 576 577#define wrmsr(msr,val1,val2) do { \ 578 paravirt_write_msr(msr, val1, val2); \ 579} while(0) 580 581#define rdmsrl(msr,val) do { \ 582 int _err; \ 583 val = paravirt_read_msr(msr, &_err); \ 584} while(0) 585 586#define wrmsrl(msr,val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) 587#define wrmsr_safe(msr,a,b) paravirt_write_msr(msr, a, b) 588 589/* rdmsr with exception handling */ 590#define rdmsr_safe(msr,a,b) ({ \ 591 int _err; \ 592 u64 _l = paravirt_read_msr(msr, &_err); \ 593 (*a) = (u32)_l; \ 594 (*b) = _l >> 32; \ 595 _err; }) 596 597 598static inline u64 paravirt_read_tsc(void) 599{ 600 return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); 601} 602 603#define rdtscl(low) do { \ 604 u64 _l = paravirt_read_tsc(); \ 605 low = (int)_l; \ 606} while(0) 607 608#define rdtscll(val) (val = paravirt_read_tsc()) 609 610static inline unsigned long long paravirt_sched_clock(void) 611{ 612 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); 613} 614#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) 615 616#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) 617 618static inline unsigned long long paravirt_read_pmc(int counter) 619{ 620 return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); 621} 622 623#define rdpmc(counter,low,high) do { \ 624 u64 _l = paravirt_read_pmc(counter); \ 625 low = (u32)_l; \ 626 high = _l >> 32; \ 627} while(0) 628 629static inline void load_TR_desc(void) 630{ 631 PVOP_VCALL0(pv_cpu_ops.load_tr_desc); 632} 633static inline void load_gdt(const struct Xgt_desc_struct *dtr) 634{ 635 PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); 636} 637static inline void load_idt(const struct Xgt_desc_struct *dtr) 638{ 639 PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); 640} 641static inline void set_ldt(const void *addr, unsigned entries) 642{ 643 PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); 644} 645static inline void store_gdt(struct Xgt_desc_struct *dtr) 646{ 647 PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); 648} 649static inline void store_idt(struct Xgt_desc_struct *dtr) 650{ 651 PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); 652} 653static inline unsigned long paravirt_store_tr(void) 654{ 655 return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); 656} 657#define store_tr(tr) ((tr) = paravirt_store_tr()) 658static inline void load_TLS(struct thread_struct *t, unsigned cpu) 659{ 660 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); 661} 662static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high) 663{ 664 PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high); 665} 666static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high) 667{ 668 PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high); 669} 670static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high) 671{ 672 PVOP_VCALL4(pv_cpu_ops.write_idt_entry, dt, entry, low, high); 673} 674static inline void set_iopl_mask(unsigned mask) 675{ 676 PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); 677} 678 679/* The paravirtualized I/O functions */ 680static inline void slow_down_io(void) { 681 pv_cpu_ops.io_delay(); 682#ifdef REALLY_SLOW_IO 683 pv_cpu_ops.io_delay(); 684 pv_cpu_ops.io_delay(); 685 pv_cpu_ops.io_delay(); 686#endif 687} 688 689#ifdef CONFIG_X86_LOCAL_APIC 690/* 691 * Basic functions accessing APICs. 692 */ 693static inline void apic_write(unsigned long reg, unsigned long v) 694{ 695 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); 696} 697 698static inline void apic_write_atomic(unsigned long reg, unsigned long v) 699{ 700 PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); 701} 702 703static inline unsigned long apic_read(unsigned long reg) 704{ 705 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); 706} 707 708static inline void setup_boot_clock(void) 709{ 710 PVOP_VCALL0(pv_apic_ops.setup_boot_clock); 711} 712 713static inline void setup_secondary_clock(void) 714{ 715 PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); 716} 717#endif 718 719static inline void paravirt_post_allocator_init(void) 720{ 721 if (pv_init_ops.post_allocator_init) 722 (*pv_init_ops.post_allocator_init)(); 723} 724 725static inline void paravirt_pagetable_setup_start(pgd_t *base) 726{ 727 (*pv_mmu_ops.pagetable_setup_start)(base); 728} 729 730static inline void paravirt_pagetable_setup_done(pgd_t *base) 731{ 732 (*pv_mmu_ops.pagetable_setup_done)(base); 733} 734 735#ifdef CONFIG_SMP 736static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, 737 unsigned long start_esp) 738{ 739 PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, 740 phys_apicid, start_eip, start_esp); 741} 742#endif 743 744static inline void paravirt_activate_mm(struct mm_struct *prev, 745 struct mm_struct *next) 746{ 747 PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); 748} 749 750static inline void arch_dup_mmap(struct mm_struct *oldmm, 751 struct mm_struct *mm) 752{ 753 PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); 754} 755 756static inline void arch_exit_mmap(struct mm_struct *mm) 757{ 758 PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); 759} 760 761static inline void __flush_tlb(void) 762{ 763 PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); 764} 765static inline void __flush_tlb_global(void) 766{ 767 PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); 768} 769static inline void __flush_tlb_single(unsigned long addr) 770{ 771 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); 772} 773 774static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 775 unsigned long va) 776{ 777 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 778} 779 780static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn) 781{ 782 PVOP_VCALL2(pv_mmu_ops.alloc_pt, mm, pfn); 783} 784static inline void paravirt_release_pt(unsigned pfn) 785{ 786 PVOP_VCALL1(pv_mmu_ops.release_pt, pfn); 787} 788 789static inline void paravirt_alloc_pd(unsigned pfn) 790{ 791 PVOP_VCALL1(pv_mmu_ops.alloc_pd, pfn); 792} 793 794static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, 795 unsigned start, unsigned count) 796{ 797 PVOP_VCALL4(pv_mmu_ops.alloc_pd_clone, pfn, clonepfn, start, count); 798} 799static inline void paravirt_release_pd(unsigned pfn) 800{ 801 PVOP_VCALL1(pv_mmu_ops.release_pd, pfn); 802} 803 804#ifdef CONFIG_HIGHPTE 805static inline void *kmap_atomic_pte(struct page *page, enum km_type type) 806{ 807 unsigned long ret; 808 ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); 809 return (void *)ret; 810} 811#endif 812 813static inline void pte_update(struct mm_struct *mm, unsigned long addr, 814 pte_t *ptep) 815{ 816 PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); 817} 818 819static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, 820 pte_t *ptep) 821{ 822 PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); 823} 824 825#ifdef CONFIG_X86_PAE 826static inline pte_t __pte(unsigned long long val) 827{ 828 unsigned long long ret = PVOP_CALL2(unsigned long long, 829 pv_mmu_ops.make_pte, 830 val, val >> 32); 831 return (pte_t) { ret, ret >> 32 }; 832} 833 834static inline pmd_t __pmd(unsigned long long val) 835{ 836 return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd, 837 val, val >> 32) }; 838} 839 840static inline pgd_t __pgd(unsigned long long val) 841{ 842 return (pgd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pgd, 843 val, val >> 32) }; 844} 845 846static inline unsigned long long pte_val(pte_t x) 847{ 848 return PVOP_CALL2(unsigned long long, pv_mmu_ops.pte_val, 849 x.pte_low, x.pte_high); 850} 851 852static inline unsigned long long pmd_val(pmd_t x) 853{ 854 return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val, 855 x.pmd, x.pmd >> 32); 856} 857 858static inline unsigned long long pgd_val(pgd_t x) 859{ 860 return PVOP_CALL2(unsigned long long, pv_mmu_ops.pgd_val, 861 x.pgd, x.pgd >> 32); 862} 863 864static inline void set_pte(pte_t *ptep, pte_t pteval) 865{ 866 PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high); 867} 868 869static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 870 pte_t *ptep, pte_t pteval) 871{ 872 /* 5 arg words */ 873 pv_mmu_ops.set_pte_at(mm, addr, ptep, pteval); 874} 875 876static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) 877{ 878 PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, 879 pteval.pte_low, pteval.pte_high); 880} 881 882static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, 883 pte_t *ptep, pte_t pte) 884{ 885 /* 5 arg words */ 886 pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); 887} 888 889static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) 890{ 891 PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, 892 pmdval.pmd, pmdval.pmd >> 32); 893} 894 895static inline void set_pud(pud_t *pudp, pud_t pudval) 896{ 897 PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, 898 pudval.pgd.pgd, pudval.pgd.pgd >> 32); 899} 900 901static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 902{ 903 PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); 904} 905 906static inline void pmd_clear(pmd_t *pmdp) 907{ 908 PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); 909} 910 911#else /* !CONFIG_X86_PAE */ 912 913static inline pte_t __pte(unsigned long val) 914{ 915 return (pte_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pte, val) }; 916} 917 918static inline pgd_t __pgd(unsigned long val) 919{ 920 return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) }; 921} 922 923static inline unsigned long pte_val(pte_t x) 924{ 925 return PVOP_CALL1(unsigned long, pv_mmu_ops.pte_val, x.pte_low); 926} 927 928static inline unsigned long pgd_val(pgd_t x) 929{ 930 return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd); 931} 932 933static inline void set_pte(pte_t *ptep, pte_t pteval) 934{ 935 PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low); 936} 937 938static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 939 pte_t *ptep, pte_t pteval) 940{ 941 PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pteval.pte_low); 942} 943 944static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) 945{ 946 PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd); 947} 948#endif /* CONFIG_X86_PAE */ 949 950/* Lazy mode for batching updates / context switch */ 951enum paravirt_lazy_mode { 952 PARAVIRT_LAZY_NONE, 953 PARAVIRT_LAZY_MMU, 954 PARAVIRT_LAZY_CPU, 955}; 956 957enum paravirt_lazy_mode paravirt_get_lazy_mode(void); 958void paravirt_enter_lazy_cpu(void); 959void paravirt_leave_lazy_cpu(void); 960void paravirt_enter_lazy_mmu(void); 961void paravirt_leave_lazy_mmu(void); 962void paravirt_leave_lazy(enum paravirt_lazy_mode mode); 963 964#define __HAVE_ARCH_ENTER_LAZY_CPU_MODE 965static inline void arch_enter_lazy_cpu_mode(void) 966{ 967 PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); 968} 969 970static inline void arch_leave_lazy_cpu_mode(void) 971{ 972 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 973} 974 975static inline void arch_flush_lazy_cpu_mode(void) 976{ 977 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { 978 arch_leave_lazy_cpu_mode(); 979 arch_enter_lazy_cpu_mode(); 980 } 981} 982 983 984#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 985static inline void arch_enter_lazy_mmu_mode(void) 986{ 987 PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); 988} 989 990static inline void arch_leave_lazy_mmu_mode(void) 991{ 992 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); 993} 994 995static inline void arch_flush_lazy_mmu_mode(void) 996{ 997 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { 998 arch_leave_lazy_mmu_mode(); 999 arch_enter_lazy_mmu_mode(); 1000 } 1001} 1002 1003void _paravirt_nop(void); 1004#define paravirt_nop ((void *)_paravirt_nop) 1005 1006/* These all sit in the .parainstructions section to tell us what to patch. */ 1007struct paravirt_patch_site { 1008 u8 *instr; /* original instructions */ 1009 u8 instrtype; /* type of this instruction */ 1010 u8 len; /* length of original instruction */ 1011 u16 clobbers; /* what registers you may clobber */ 1012}; 1013 1014extern struct paravirt_patch_site __parainstructions[], 1015 __parainstructions_end[]; 1016 1017static inline unsigned long __raw_local_save_flags(void) 1018{ 1019 unsigned long f; 1020 1021 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" 1022 PARAVIRT_CALL 1023 "popl %%edx; popl %%ecx") 1024 : "=a"(f) 1025 : paravirt_type(pv_irq_ops.save_fl), 1026 paravirt_clobber(CLBR_EAX) 1027 : "memory", "cc"); 1028 return f; 1029} 1030 1031static inline void raw_local_irq_restore(unsigned long f) 1032{ 1033 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" 1034 PARAVIRT_CALL 1035 "popl %%edx; popl %%ecx") 1036 : "=a"(f) 1037 : "0"(f), 1038 paravirt_type(pv_irq_ops.restore_fl), 1039 paravirt_clobber(CLBR_EAX) 1040 : "memory", "cc"); 1041} 1042 1043static inline void raw_local_irq_disable(void) 1044{ 1045 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" 1046 PARAVIRT_CALL 1047 "popl %%edx; popl %%ecx") 1048 : 1049 : paravirt_type(pv_irq_ops.irq_disable), 1050 paravirt_clobber(CLBR_EAX) 1051 : "memory", "eax", "cc"); 1052} 1053 1054static inline void raw_local_irq_enable(void) 1055{ 1056 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" 1057 PARAVIRT_CALL 1058 "popl %%edx; popl %%ecx") 1059 : 1060 : paravirt_type(pv_irq_ops.irq_enable), 1061 paravirt_clobber(CLBR_EAX) 1062 : "memory", "eax", "cc"); 1063} 1064 1065static inline unsigned long __raw_local_irq_save(void) 1066{ 1067 unsigned long f; 1068 1069 f = __raw_local_save_flags(); 1070 raw_local_irq_disable(); 1071 return f; 1072} 1073 1074#define CLI_STRING \ 1075 _paravirt_alt("pushl %%ecx; pushl %%edx;" \ 1076 "call *%[paravirt_cli_opptr];" \ 1077 "popl %%edx; popl %%ecx", \ 1078 "%c[paravirt_cli_type]", "%c[paravirt_clobber]") 1079 1080#define STI_STRING \ 1081 _paravirt_alt("pushl %%ecx; pushl %%edx;" \ 1082 "call *%[paravirt_sti_opptr];" \ 1083 "popl %%edx; popl %%ecx", \ 1084 "%c[paravirt_sti_type]", "%c[paravirt_clobber]") 1085 1086#define CLI_STI_CLOBBERS , "%eax" 1087#define CLI_STI_INPUT_ARGS \ 1088 , \ 1089 [paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)), \ 1090 [paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable), \ 1091 [paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)), \ 1092 [paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable), \ 1093 paravirt_clobber(CLBR_EAX) 1094 1095/* Make sure as little as possible of this mess escapes. */ 1096#undef PARAVIRT_CALL 1097#undef __PVOP_CALL 1098#undef __PVOP_VCALL 1099#undef PVOP_VCALL0 1100#undef PVOP_CALL0 1101#undef PVOP_VCALL1 1102#undef PVOP_CALL1 1103#undef PVOP_VCALL2 1104#undef PVOP_CALL2 1105#undef PVOP_VCALL3 1106#undef PVOP_CALL3 1107#undef PVOP_VCALL4 1108#undef PVOP_CALL4 1109 1110#else /* __ASSEMBLY__ */ 1111 1112#define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) 1113 1114#define PARA_SITE(ptype, clobbers, ops) \ 1115771:; \ 1116 ops; \ 1117772:; \ 1118 .pushsection .parainstructions,"a"; \ 1119 .long 771b; \ 1120 .byte ptype; \ 1121 .byte 772b-771b; \ 1122 .short clobbers; \ 1123 .popsection 1124 1125#define INTERRUPT_RETURN \ 1126 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ 1127 jmp *%cs:pv_cpu_ops+PV_CPU_iret) 1128 1129#define DISABLE_INTERRUPTS(clobbers) \ 1130 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ 1131 pushl %eax; pushl %ecx; pushl %edx; \ 1132 call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \ 1133 popl %edx; popl %ecx; popl %eax) \ 1134 1135#define ENABLE_INTERRUPTS(clobbers) \ 1136 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ 1137 pushl %eax; pushl %ecx; pushl %edx; \ 1138 call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \ 1139 popl %edx; popl %ecx; popl %eax) 1140 1141#define ENABLE_INTERRUPTS_SYSEXIT \ 1142 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\ 1143 jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit) 1144 1145#define GET_CR0_INTO_EAX \ 1146 push %ecx; push %edx; \ 1147 call *pv_cpu_ops+PV_CPU_read_cr0; \ 1148 pop %edx; pop %ecx 1149 1150#endif /* __ASSEMBLY__ */ 1151#endif /* CONFIG_PARAVIRT */ 1152#endif /* __ASM_PARAVIRT_H */ 1153