1/* 2 * Kernel-based Virtual Machine driver for Linux 3 * 4 * AMD SVM support 5 * 6 * Copyright (C) 2006 Qumranet, Inc. 7 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 8 * 9 * Authors: 10 * Yaniv Kamay <yaniv@qumranet.com> 11 * Avi Kivity <avi@qumranet.com> 12 * 13 * This work is licensed under the terms of the GNU GPL, version 2. See 14 * the COPYING file in the top-level directory. 15 * 16 */ 17#include <linux/kvm_host.h> 18 19#include "irq.h" 20#include "mmu.h" 21#include "kvm_cache_regs.h" 22#include "x86.h" 23#include "cpuid.h" 24 25#include <linux/module.h> 26#include <linux/mod_devicetable.h> 27#include <linux/kernel.h> 28#include <linux/vmalloc.h> 29#include <linux/highmem.h> 30#include <linux/sched.h> 31#include <linux/ftrace_event.h> 32#include <linux/slab.h> 33 34#include <asm/perf_event.h> 35#include <asm/tlbflush.h> 36#include <asm/desc.h> 37#include <asm/debugreg.h> 38#include <asm/kvm_para.h> 39 40#include <asm/virtext.h> 41#include "trace.h" 42 43#define __ex(x) __kvm_handle_fault_on_reboot(x) 44 45MODULE_AUTHOR("Qumranet"); 46MODULE_LICENSE("GPL"); 47 48static const struct x86_cpu_id svm_cpu_id[] = { 49 X86_FEATURE_MATCH(X86_FEATURE_SVM), 50 {} 51}; 52MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id); 53 54#define IOPM_ALLOC_ORDER 2 55#define MSRPM_ALLOC_ORDER 1 56 57#define SEG_TYPE_LDT 2 58#define SEG_TYPE_BUSY_TSS16 3 59 60#define SVM_FEATURE_NPT (1 << 0) 61#define SVM_FEATURE_LBRV (1 << 1) 62#define SVM_FEATURE_SVML (1 << 2) 63#define SVM_FEATURE_NRIP (1 << 3) 64#define SVM_FEATURE_TSC_RATE (1 << 4) 65#define SVM_FEATURE_VMCB_CLEAN (1 << 5) 66#define SVM_FEATURE_FLUSH_ASID (1 << 6) 67#define SVM_FEATURE_DECODE_ASSIST (1 << 7) 68#define SVM_FEATURE_PAUSE_FILTER (1 << 10) 69 70#define NESTED_EXIT_HOST 0 /* Exit handled on host level */ 71#define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ 72#define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ 73 74#define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) 75 76#define TSC_RATIO_RSVD 0xffffff0000000000ULL 77#define TSC_RATIO_MIN 0x0000000000000001ULL 78#define TSC_RATIO_MAX 0x000000ffffffffffULL 79 80static bool erratum_383_found __read_mostly; 81 82static const u32 host_save_user_msrs[] = { 83#ifdef CONFIG_X86_64 84 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, 85 MSR_FS_BASE, 86#endif 87 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 88}; 89 90#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) 91 92struct kvm_vcpu; 93 94struct nested_state { 95 struct vmcb *hsave; 96 u64 hsave_msr; 97 u64 vm_cr_msr; 98 u64 vmcb; 99 100 /* These are the merged vectors */ 101 u32 *msrpm; 102 103 /* gpa pointers to the real vectors */ 104 u64 vmcb_msrpm; 105 u64 vmcb_iopm; 106 107 /* A VMEXIT is required but not yet emulated */ 108 bool exit_required; 109 110 /* cache for intercepts of the guest */ 111 u32 intercept_cr; 112 u32 intercept_dr; 113 u32 intercept_exceptions; 114 u64 intercept; 115 116 /* Nested Paging related state */ 117 u64 nested_cr3; 118}; 119 120#define MSRPM_OFFSETS 16 121static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; 122 123/* 124 * Set osvw_len to higher value when updated Revision Guides 125 * are published and we know what the new status bits are 126 */ 127static uint64_t osvw_len = 4, osvw_status; 128 129struct vcpu_svm { 130 struct kvm_vcpu vcpu; 131 struct vmcb *vmcb; 132 unsigned long vmcb_pa; 133 struct svm_cpu_data *svm_data; 134 uint64_t asid_generation; 135 uint64_t sysenter_esp; 136 uint64_t sysenter_eip; 137 138 u64 next_rip; 139 140 u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS]; 141 struct { 142 u16 fs; 143 u16 gs; 144 u16 ldt; 145 u64 gs_base; 146 } host; 147 148 u32 *msrpm; 149 150 ulong nmi_iret_rip; 151 152 struct nested_state nested; 153 154 bool nmi_singlestep; 155 156 unsigned int3_injected; 157 unsigned long int3_rip; 158 u32 apf_reason; 159 160 u64 tsc_ratio; 161}; 162 163static DEFINE_PER_CPU(u64, current_tsc_ratio); 164#define TSC_RATIO_DEFAULT 0x0100000000ULL 165 166#define MSR_INVALID 0xffffffffU 167 168static const struct svm_direct_access_msrs { 169 u32 index; /* Index of the MSR */ 170 bool always; /* True if intercept is always on */ 171} direct_access_msrs[] = { 172 { .index = MSR_STAR, .always = true }, 173 { .index = MSR_IA32_SYSENTER_CS, .always = true }, 174#ifdef CONFIG_X86_64 175 { .index = MSR_GS_BASE, .always = true }, 176 { .index = MSR_FS_BASE, .always = true }, 177 { .index = MSR_KERNEL_GS_BASE, .always = true }, 178 { .index = MSR_LSTAR, .always = true }, 179 { .index = MSR_CSTAR, .always = true }, 180 { .index = MSR_SYSCALL_MASK, .always = true }, 181#endif 182 { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, 183 { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, 184 { .index = MSR_IA32_LASTINTFROMIP, .always = false }, 185 { .index = MSR_IA32_LASTINTTOIP, .always = false }, 186 { .index = MSR_INVALID, .always = false }, 187}; 188 189/* enable NPT for AMD64 and X86 with PAE */ 190#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 191static bool npt_enabled = true; 192#else 193static bool npt_enabled; 194#endif 195 196/* allow nested paging (virtualized MMU) for all guests */ 197static int npt = true; 198module_param(npt, int, S_IRUGO); 199 200/* allow nested virtualization in KVM/SVM */ 201static int nested = true; 202module_param(nested, int, S_IRUGO); 203 204static void svm_flush_tlb(struct kvm_vcpu *vcpu); 205static void svm_complete_interrupts(struct vcpu_svm *svm); 206 207static int nested_svm_exit_handled(struct vcpu_svm *svm); 208static int nested_svm_intercept(struct vcpu_svm *svm); 209static int nested_svm_vmexit(struct vcpu_svm *svm); 210static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 211 bool has_error_code, u32 error_code); 212static u64 __scale_tsc(u64 ratio, u64 tsc); 213 214enum { 215 VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, 216 pause filter count */ 217 VMCB_PERM_MAP, /* IOPM Base and MSRPM Base */ 218 VMCB_ASID, /* ASID */ 219 VMCB_INTR, /* int_ctl, int_vector */ 220 VMCB_NPT, /* npt_en, nCR3, gPAT */ 221 VMCB_CR, /* CR0, CR3, CR4, EFER */ 222 VMCB_DR, /* DR6, DR7 */ 223 VMCB_DT, /* GDT, IDT */ 224 VMCB_SEG, /* CS, DS, SS, ES, CPL */ 225 VMCB_CR2, /* CR2 only */ 226 VMCB_LBR, /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */ 227 VMCB_DIRTY_MAX, 228}; 229 230/* TPR and CR2 are always written before VMRUN */ 231#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2)) 232 233static inline void mark_all_dirty(struct vmcb *vmcb) 234{ 235 vmcb->control.clean = 0; 236} 237 238static inline void mark_all_clean(struct vmcb *vmcb) 239{ 240 vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1) 241 & ~VMCB_ALWAYS_DIRTY_MASK; 242} 243 244static inline void mark_dirty(struct vmcb *vmcb, int bit) 245{ 246 vmcb->control.clean &= ~(1 << bit); 247} 248 249static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) 250{ 251 return container_of(vcpu, struct vcpu_svm, vcpu); 252} 253 254static void recalc_intercepts(struct vcpu_svm *svm) 255{ 256 struct vmcb_control_area *c, *h; 257 struct nested_state *g; 258 259 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 260 261 if (!is_guest_mode(&svm->vcpu)) 262 return; 263 264 c = &svm->vmcb->control; 265 h = &svm->nested.hsave->control; 266 g = &svm->nested; 267 268 c->intercept_cr = h->intercept_cr | g->intercept_cr; 269 c->intercept_dr = h->intercept_dr | g->intercept_dr; 270 c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; 271 c->intercept = h->intercept | g->intercept; 272} 273 274static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm) 275{ 276 if (is_guest_mode(&svm->vcpu)) 277 return svm->nested.hsave; 278 else 279 return svm->vmcb; 280} 281 282static inline void set_cr_intercept(struct vcpu_svm *svm, int bit) 283{ 284 struct vmcb *vmcb = get_host_vmcb(svm); 285 286 vmcb->control.intercept_cr |= (1U << bit); 287 288 recalc_intercepts(svm); 289} 290 291static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit) 292{ 293 struct vmcb *vmcb = get_host_vmcb(svm); 294 295 vmcb->control.intercept_cr &= ~(1U << bit); 296 297 recalc_intercepts(svm); 298} 299 300static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) 301{ 302 struct vmcb *vmcb = get_host_vmcb(svm); 303 304 return vmcb->control.intercept_cr & (1U << bit); 305} 306 307static inline void set_dr_intercepts(struct vcpu_svm *svm) 308{ 309 struct vmcb *vmcb = get_host_vmcb(svm); 310 311 vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ) 312 | (1 << INTERCEPT_DR1_READ) 313 | (1 << INTERCEPT_DR2_READ) 314 | (1 << INTERCEPT_DR3_READ) 315 | (1 << INTERCEPT_DR4_READ) 316 | (1 << INTERCEPT_DR5_READ) 317 | (1 << INTERCEPT_DR6_READ) 318 | (1 << INTERCEPT_DR7_READ) 319 | (1 << INTERCEPT_DR0_WRITE) 320 | (1 << INTERCEPT_DR1_WRITE) 321 | (1 << INTERCEPT_DR2_WRITE) 322 | (1 << INTERCEPT_DR3_WRITE) 323 | (1 << INTERCEPT_DR4_WRITE) 324 | (1 << INTERCEPT_DR5_WRITE) 325 | (1 << INTERCEPT_DR6_WRITE) 326 | (1 << INTERCEPT_DR7_WRITE); 327 328 recalc_intercepts(svm); 329} 330 331static inline void clr_dr_intercepts(struct vcpu_svm *svm) 332{ 333 struct vmcb *vmcb = get_host_vmcb(svm); 334 335 vmcb->control.intercept_dr = 0; 336 337 recalc_intercepts(svm); 338} 339 340static inline void set_exception_intercept(struct vcpu_svm *svm, int bit) 341{ 342 struct vmcb *vmcb = get_host_vmcb(svm); 343 344 vmcb->control.intercept_exceptions |= (1U << bit); 345 346 recalc_intercepts(svm); 347} 348 349static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit) 350{ 351 struct vmcb *vmcb = get_host_vmcb(svm); 352 353 vmcb->control.intercept_exceptions &= ~(1U << bit); 354 355 recalc_intercepts(svm); 356} 357 358static inline void set_intercept(struct vcpu_svm *svm, int bit) 359{ 360 struct vmcb *vmcb = get_host_vmcb(svm); 361 362 vmcb->control.intercept |= (1ULL << bit); 363 364 recalc_intercepts(svm); 365} 366 367static inline void clr_intercept(struct vcpu_svm *svm, int bit) 368{ 369 struct vmcb *vmcb = get_host_vmcb(svm); 370 371 vmcb->control.intercept &= ~(1ULL << bit); 372 373 recalc_intercepts(svm); 374} 375 376static inline void enable_gif(struct vcpu_svm *svm) 377{ 378 svm->vcpu.arch.hflags |= HF_GIF_MASK; 379} 380 381static inline void disable_gif(struct vcpu_svm *svm) 382{ 383 svm->vcpu.arch.hflags &= ~HF_GIF_MASK; 384} 385 386static inline bool gif_set(struct vcpu_svm *svm) 387{ 388 return !!(svm->vcpu.arch.hflags & HF_GIF_MASK); 389} 390 391static unsigned long iopm_base; 392 393struct kvm_ldttss_desc { 394 u16 limit0; 395 u16 base0; 396 unsigned base1:8, type:5, dpl:2, p:1; 397 unsigned limit1:4, zero0:3, g:1, base2:8; 398 u32 base3; 399 u32 zero1; 400} __attribute__((packed)); 401 402struct svm_cpu_data { 403 int cpu; 404 405 u64 asid_generation; 406 u32 max_asid; 407 u32 next_asid; 408 struct kvm_ldttss_desc *tss_desc; 409 410 struct page *save_area; 411}; 412 413static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); 414 415struct svm_init_data { 416 int cpu; 417 int r; 418}; 419 420static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; 421 422#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges) 423#define MSRS_RANGE_SIZE 2048 424#define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) 425 426static u32 svm_msrpm_offset(u32 msr) 427{ 428 u32 offset; 429 int i; 430 431 for (i = 0; i < NUM_MSR_MAPS; i++) { 432 if (msr < msrpm_ranges[i] || 433 msr >= msrpm_ranges[i] + MSRS_IN_RANGE) 434 continue; 435 436 offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ 437 offset += (i * MSRS_RANGE_SIZE); /* add range offset */ 438 439 /* Now we have the u8 offset - but need the u32 offset */ 440 return offset / 4; 441 } 442 443 /* MSR not in any range */ 444 return MSR_INVALID; 445} 446 447#define MAX_INST_SIZE 15 448 449static inline void clgi(void) 450{ 451 asm volatile (__ex(SVM_CLGI)); 452} 453 454static inline void stgi(void) 455{ 456 asm volatile (__ex(SVM_STGI)); 457} 458 459static inline void invlpga(unsigned long addr, u32 asid) 460{ 461 asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); 462} 463 464static int get_npt_level(void) 465{ 466#ifdef CONFIG_X86_64 467 return PT64_ROOT_LEVEL; 468#else 469 return PT32E_ROOT_LEVEL; 470#endif 471} 472 473static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) 474{ 475 vcpu->arch.efer = efer; 476 if (!npt_enabled && !(efer & EFER_LMA)) 477 efer &= ~EFER_LME; 478 479 to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; 480 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); 481} 482 483static int is_external_interrupt(u32 info) 484{ 485 info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; 486 return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); 487} 488 489static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu) 490{ 491 struct vcpu_svm *svm = to_svm(vcpu); 492 u32 ret = 0; 493 494 if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) 495 ret = KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; 496 return ret; 497} 498 499static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) 500{ 501 struct vcpu_svm *svm = to_svm(vcpu); 502 503 if (mask == 0) 504 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 505 else 506 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; 507 508} 509 510static void skip_emulated_instruction(struct kvm_vcpu *vcpu) 511{ 512 struct vcpu_svm *svm = to_svm(vcpu); 513 514 if (svm->vmcb->control.next_rip != 0) 515 svm->next_rip = svm->vmcb->control.next_rip; 516 517 if (!svm->next_rip) { 518 if (emulate_instruction(vcpu, EMULTYPE_SKIP) != 519 EMULATE_DONE) 520 printk(KERN_DEBUG "%s: NOP\n", __func__); 521 return; 522 } 523 if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) 524 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n", 525 __func__, kvm_rip_read(vcpu), svm->next_rip); 526 527 kvm_rip_write(vcpu, svm->next_rip); 528 svm_set_interrupt_shadow(vcpu, 0); 529} 530 531static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, 532 bool has_error_code, u32 error_code, 533 bool reinject) 534{ 535 struct vcpu_svm *svm = to_svm(vcpu); 536 537 /* 538 * If we are within a nested VM we'd better #VMEXIT and let the guest 539 * handle the exception 540 */ 541 if (!reinject && 542 nested_svm_check_exception(svm, nr, has_error_code, error_code)) 543 return; 544 545 if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { 546 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); 547 548 /* 549 * For guest debugging where we have to reinject #BP if some 550 * INT3 is guest-owned: 551 * Emulate nRIP by moving RIP forward. Will fail if injection 552 * raises a fault that is not intercepted. Still better than 553 * failing in all cases. 554 */ 555 skip_emulated_instruction(&svm->vcpu); 556 rip = kvm_rip_read(&svm->vcpu); 557 svm->int3_rip = rip + svm->vmcb->save.cs.base; 558 svm->int3_injected = rip - old_rip; 559 } 560 561 svm->vmcb->control.event_inj = nr 562 | SVM_EVTINJ_VALID 563 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) 564 | SVM_EVTINJ_TYPE_EXEPT; 565 svm->vmcb->control.event_inj_err = error_code; 566} 567 568static void svm_init_erratum_383(void) 569{ 570 u32 low, high; 571 int err; 572 u64 val; 573 574 if (!static_cpu_has_bug(X86_BUG_AMD_TLB_MMATCH)) 575 return; 576 577 /* Use _safe variants to not break nested virtualization */ 578 val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err); 579 if (err) 580 return; 581 582 val |= (1ULL << 47); 583 584 low = lower_32_bits(val); 585 high = upper_32_bits(val); 586 587 native_write_msr_safe(MSR_AMD64_DC_CFG, low, high); 588 589 erratum_383_found = true; 590} 591 592static void svm_init_osvw(struct kvm_vcpu *vcpu) 593{ 594 /* 595 * Guests should see errata 400 and 415 as fixed (assuming that 596 * HLT and IO instructions are intercepted). 597 */ 598 vcpu->arch.osvw.length = (osvw_len >= 3) ? (osvw_len) : 3; 599 vcpu->arch.osvw.status = osvw_status & ~(6ULL); 600 601 /* 602 * By increasing VCPU's osvw.length to 3 we are telling the guest that 603 * all osvw.status bits inside that length, including bit 0 (which is 604 * reserved for erratum 298), are valid. However, if host processor's 605 * osvw_len is 0 then osvw_status[0] carries no information. We need to 606 * be conservative here and therefore we tell the guest that erratum 298 607 * is present (because we really don't know). 608 */ 609 if (osvw_len == 0 && boot_cpu_data.x86 == 0x10) 610 vcpu->arch.osvw.status |= 1; 611} 612 613static int has_svm(void) 614{ 615 const char *msg; 616 617 if (!cpu_has_svm(&msg)) { 618 printk(KERN_INFO "has_svm: %s\n", msg); 619 return 0; 620 } 621 622 return 1; 623} 624 625static void svm_hardware_disable(void) 626{ 627 /* Make sure we clean up behind us */ 628 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) 629 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); 630 631 cpu_svm_disable(); 632 633 amd_pmu_disable_virt(); 634} 635 636static int svm_hardware_enable(void) 637{ 638 639 struct svm_cpu_data *sd; 640 uint64_t efer; 641 struct desc_ptr gdt_descr; 642 struct desc_struct *gdt; 643 int me = raw_smp_processor_id(); 644 645 rdmsrl(MSR_EFER, efer); 646 if (efer & EFER_SVME) 647 return -EBUSY; 648 649 if (!has_svm()) { 650 pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me); 651 return -EINVAL; 652 } 653 sd = per_cpu(svm_data, me); 654 if (!sd) { 655 pr_err("%s: svm_data is NULL on %d\n", __func__, me); 656 return -EINVAL; 657 } 658 659 sd->asid_generation = 1; 660 sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; 661 sd->next_asid = sd->max_asid + 1; 662 663 native_store_gdt(&gdt_descr); 664 gdt = (struct desc_struct *)gdt_descr.address; 665 sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); 666 667 wrmsrl(MSR_EFER, efer | EFER_SVME); 668 669 wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); 670 671 if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) { 672 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT); 673 __this_cpu_write(current_tsc_ratio, TSC_RATIO_DEFAULT); 674 } 675 676 677 /* 678 * Get OSVW bits. 679 * 680 * Note that it is possible to have a system with mixed processor 681 * revisions and therefore different OSVW bits. If bits are not the same 682 * on different processors then choose the worst case (i.e. if erratum 683 * is present on one processor and not on another then assume that the 684 * erratum is present everywhere). 685 */ 686 if (cpu_has(&boot_cpu_data, X86_FEATURE_OSVW)) { 687 uint64_t len, status = 0; 688 int err; 689 690 len = native_read_msr_safe(MSR_AMD64_OSVW_ID_LENGTH, &err); 691 if (!err) 692 status = native_read_msr_safe(MSR_AMD64_OSVW_STATUS, 693 &err); 694 695 if (err) 696 osvw_status = osvw_len = 0; 697 else { 698 if (len < osvw_len) 699 osvw_len = len; 700 osvw_status |= status; 701 osvw_status &= (1ULL << osvw_len) - 1; 702 } 703 } else 704 osvw_status = osvw_len = 0; 705 706 svm_init_erratum_383(); 707 708 amd_pmu_enable_virt(); 709 710 return 0; 711} 712 713static void svm_cpu_uninit(int cpu) 714{ 715 struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); 716 717 if (!sd) 718 return; 719 720 per_cpu(svm_data, raw_smp_processor_id()) = NULL; 721 __free_page(sd->save_area); 722 kfree(sd); 723} 724 725static int svm_cpu_init(int cpu) 726{ 727 struct svm_cpu_data *sd; 728 int r; 729 730 sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); 731 if (!sd) 732 return -ENOMEM; 733 sd->cpu = cpu; 734 sd->save_area = alloc_page(GFP_KERNEL); 735 r = -ENOMEM; 736 if (!sd->save_area) 737 goto err_1; 738 739 per_cpu(svm_data, cpu) = sd; 740 741 return 0; 742 743err_1: 744 kfree(sd); 745 return r; 746 747} 748 749static bool valid_msr_intercept(u32 index) 750{ 751 int i; 752 753 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) 754 if (direct_access_msrs[i].index == index) 755 return true; 756 757 return false; 758} 759 760static void set_msr_interception(u32 *msrpm, unsigned msr, 761 int read, int write) 762{ 763 u8 bit_read, bit_write; 764 unsigned long tmp; 765 u32 offset; 766 767 /* 768 * If this warning triggers extend the direct_access_msrs list at the 769 * beginning of the file 770 */ 771 WARN_ON(!valid_msr_intercept(msr)); 772 773 offset = svm_msrpm_offset(msr); 774 bit_read = 2 * (msr & 0x0f); 775 bit_write = 2 * (msr & 0x0f) + 1; 776 tmp = msrpm[offset]; 777 778 BUG_ON(offset == MSR_INVALID); 779 780 read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); 781 write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); 782 783 msrpm[offset] = tmp; 784} 785 786static void svm_vcpu_init_msrpm(u32 *msrpm) 787{ 788 int i; 789 790 memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); 791 792 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { 793 if (!direct_access_msrs[i].always) 794 continue; 795 796 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); 797 } 798} 799 800static void add_msr_offset(u32 offset) 801{ 802 int i; 803 804 for (i = 0; i < MSRPM_OFFSETS; ++i) { 805 806 /* Offset already in list? */ 807 if (msrpm_offsets[i] == offset) 808 return; 809 810 /* Slot used by another offset? */ 811 if (msrpm_offsets[i] != MSR_INVALID) 812 continue; 813 814 /* Add offset to list */ 815 msrpm_offsets[i] = offset; 816 817 return; 818 } 819 820 /* 821 * If this BUG triggers the msrpm_offsets table has an overflow. Just 822 * increase MSRPM_OFFSETS in this case. 823 */ 824 BUG(); 825} 826 827static void init_msrpm_offsets(void) 828{ 829 int i; 830 831 memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); 832 833 for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { 834 u32 offset; 835 836 offset = svm_msrpm_offset(direct_access_msrs[i].index); 837 BUG_ON(offset == MSR_INVALID); 838 839 add_msr_offset(offset); 840 } 841} 842 843static void svm_enable_lbrv(struct vcpu_svm *svm) 844{ 845 u32 *msrpm = svm->msrpm; 846 847 svm->vmcb->control.lbr_ctl = 1; 848 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); 849 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); 850 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); 851 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1); 852} 853 854static void svm_disable_lbrv(struct vcpu_svm *svm) 855{ 856 u32 *msrpm = svm->msrpm; 857 858 svm->vmcb->control.lbr_ctl = 0; 859 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); 860 set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); 861 set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); 862 set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0); 863} 864 865static __init int svm_hardware_setup(void) 866{ 867 int cpu; 868 struct page *iopm_pages; 869 void *iopm_va; 870 int r; 871 872 iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER); 873 874 if (!iopm_pages) 875 return -ENOMEM; 876 877 iopm_va = page_address(iopm_pages); 878 memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); 879 iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; 880 881 init_msrpm_offsets(); 882 883 if (boot_cpu_has(X86_FEATURE_NX)) 884 kvm_enable_efer_bits(EFER_NX); 885 886 if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) 887 kvm_enable_efer_bits(EFER_FFXSR); 888 889 if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { 890 u64 max; 891 892 kvm_has_tsc_control = true; 893 894 /* 895 * Make sure the user can only configure tsc_khz values that 896 * fit into a signed integer. 897 * A min value is not calculated needed because it will always 898 * be 1 on all machines and a value of 0 is used to disable 899 * tsc-scaling for the vcpu. 900 */ 901 max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX)); 902 903 kvm_max_guest_tsc_khz = max; 904 } 905 906 if (nested) { 907 printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); 908 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); 909 } 910 911 for_each_possible_cpu(cpu) { 912 r = svm_cpu_init(cpu); 913 if (r) 914 goto err; 915 } 916 917 if (!boot_cpu_has(X86_FEATURE_NPT)) 918 npt_enabled = false; 919 920 if (npt_enabled && !npt) { 921 printk(KERN_INFO "kvm: Nested Paging disabled\n"); 922 npt_enabled = false; 923 } 924 925 if (npt_enabled) { 926 printk(KERN_INFO "kvm: Nested Paging enabled\n"); 927 kvm_enable_tdp(); 928 } else 929 kvm_disable_tdp(); 930 931 return 0; 932 933err: 934 __free_pages(iopm_pages, IOPM_ALLOC_ORDER); 935 iopm_base = 0; 936 return r; 937} 938 939static __exit void svm_hardware_unsetup(void) 940{ 941 int cpu; 942 943 for_each_possible_cpu(cpu) 944 svm_cpu_uninit(cpu); 945 946 __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); 947 iopm_base = 0; 948} 949 950static void init_seg(struct vmcb_seg *seg) 951{ 952 seg->selector = 0; 953 seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | 954 SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ 955 seg->limit = 0xffff; 956 seg->base = 0; 957} 958 959static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) 960{ 961 seg->selector = 0; 962 seg->attrib = SVM_SELECTOR_P_MASK | type; 963 seg->limit = 0xffff; 964 seg->base = 0; 965} 966 967static u64 __scale_tsc(u64 ratio, u64 tsc) 968{ 969 u64 mult, frac, _tsc; 970 971 mult = ratio >> 32; 972 frac = ratio & ((1ULL << 32) - 1); 973 974 _tsc = tsc; 975 _tsc *= mult; 976 _tsc += (tsc >> 32) * frac; 977 _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32; 978 979 return _tsc; 980} 981 982static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc) 983{ 984 struct vcpu_svm *svm = to_svm(vcpu); 985 u64 _tsc = tsc; 986 987 if (svm->tsc_ratio != TSC_RATIO_DEFAULT) 988 _tsc = __scale_tsc(svm->tsc_ratio, tsc); 989 990 return _tsc; 991} 992 993static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale) 994{ 995 struct vcpu_svm *svm = to_svm(vcpu); 996 u64 ratio; 997 u64 khz; 998 999 /* Guest TSC same frequency as host TSC? */ 1000 if (!scale) { 1001 svm->tsc_ratio = TSC_RATIO_DEFAULT; 1002 return; 1003 } 1004 1005 /* TSC scaling supported? */ 1006 if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { 1007 if (user_tsc_khz > tsc_khz) { 1008 vcpu->arch.tsc_catchup = 1; 1009 vcpu->arch.tsc_always_catchup = 1; 1010 } else 1011 WARN(1, "user requested TSC rate below hardware speed\n"); 1012 return; 1013 } 1014 1015 khz = user_tsc_khz; 1016 1017 /* TSC scaling required - calculate ratio */ 1018 ratio = khz << 32; 1019 do_div(ratio, tsc_khz); 1020 1021 if (ratio == 0 || ratio & TSC_RATIO_RSVD) { 1022 WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n", 1023 user_tsc_khz); 1024 return; 1025 } 1026 svm->tsc_ratio = ratio; 1027} 1028 1029static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu) 1030{ 1031 struct vcpu_svm *svm = to_svm(vcpu); 1032 1033 return svm->vmcb->control.tsc_offset; 1034} 1035 1036static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) 1037{ 1038 struct vcpu_svm *svm = to_svm(vcpu); 1039 u64 g_tsc_offset = 0; 1040 1041 if (is_guest_mode(vcpu)) { 1042 g_tsc_offset = svm->vmcb->control.tsc_offset - 1043 svm->nested.hsave->control.tsc_offset; 1044 svm->nested.hsave->control.tsc_offset = offset; 1045 } else 1046 trace_kvm_write_tsc_offset(vcpu->vcpu_id, 1047 svm->vmcb->control.tsc_offset, 1048 offset); 1049 1050 svm->vmcb->control.tsc_offset = offset + g_tsc_offset; 1051 1052 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 1053} 1054 1055static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment, bool host) 1056{ 1057 struct vcpu_svm *svm = to_svm(vcpu); 1058 1059 WARN_ON(adjustment < 0); 1060 if (host) 1061 adjustment = svm_scale_tsc(vcpu, adjustment); 1062 1063 svm->vmcb->control.tsc_offset += adjustment; 1064 if (is_guest_mode(vcpu)) 1065 svm->nested.hsave->control.tsc_offset += adjustment; 1066 else 1067 trace_kvm_write_tsc_offset(vcpu->vcpu_id, 1068 svm->vmcb->control.tsc_offset - adjustment, 1069 svm->vmcb->control.tsc_offset); 1070 1071 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 1072} 1073 1074static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) 1075{ 1076 u64 tsc; 1077 1078 tsc = svm_scale_tsc(vcpu, native_read_tsc()); 1079 1080 return target_tsc - tsc; 1081} 1082 1083static void init_vmcb(struct vcpu_svm *svm) 1084{ 1085 struct vmcb_control_area *control = &svm->vmcb->control; 1086 struct vmcb_save_area *save = &svm->vmcb->save; 1087 1088 svm->vcpu.fpu_active = 1; 1089 svm->vcpu.arch.hflags = 0; 1090 1091 set_cr_intercept(svm, INTERCEPT_CR0_READ); 1092 set_cr_intercept(svm, INTERCEPT_CR3_READ); 1093 set_cr_intercept(svm, INTERCEPT_CR4_READ); 1094 set_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1095 set_cr_intercept(svm, INTERCEPT_CR3_WRITE); 1096 set_cr_intercept(svm, INTERCEPT_CR4_WRITE); 1097 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 1098 1099 set_dr_intercepts(svm); 1100 1101 set_exception_intercept(svm, PF_VECTOR); 1102 set_exception_intercept(svm, UD_VECTOR); 1103 set_exception_intercept(svm, MC_VECTOR); 1104 1105 set_intercept(svm, INTERCEPT_INTR); 1106 set_intercept(svm, INTERCEPT_NMI); 1107 set_intercept(svm, INTERCEPT_SMI); 1108 set_intercept(svm, INTERCEPT_SELECTIVE_CR0); 1109 set_intercept(svm, INTERCEPT_RDPMC); 1110 set_intercept(svm, INTERCEPT_CPUID); 1111 set_intercept(svm, INTERCEPT_INVD); 1112 set_intercept(svm, INTERCEPT_HLT); 1113 set_intercept(svm, INTERCEPT_INVLPG); 1114 set_intercept(svm, INTERCEPT_INVLPGA); 1115 set_intercept(svm, INTERCEPT_IOIO_PROT); 1116 set_intercept(svm, INTERCEPT_MSR_PROT); 1117 set_intercept(svm, INTERCEPT_TASK_SWITCH); 1118 set_intercept(svm, INTERCEPT_SHUTDOWN); 1119 set_intercept(svm, INTERCEPT_VMRUN); 1120 set_intercept(svm, INTERCEPT_VMMCALL); 1121 set_intercept(svm, INTERCEPT_VMLOAD); 1122 set_intercept(svm, INTERCEPT_VMSAVE); 1123 set_intercept(svm, INTERCEPT_STGI); 1124 set_intercept(svm, INTERCEPT_CLGI); 1125 set_intercept(svm, INTERCEPT_SKINIT); 1126 set_intercept(svm, INTERCEPT_WBINVD); 1127 set_intercept(svm, INTERCEPT_MONITOR); 1128 set_intercept(svm, INTERCEPT_MWAIT); 1129 set_intercept(svm, INTERCEPT_XSETBV); 1130 1131 control->iopm_base_pa = iopm_base; 1132 control->msrpm_base_pa = __pa(svm->msrpm); 1133 control->int_ctl = V_INTR_MASKING_MASK; 1134 1135 init_seg(&save->es); 1136 init_seg(&save->ss); 1137 init_seg(&save->ds); 1138 init_seg(&save->fs); 1139 init_seg(&save->gs); 1140 1141 save->cs.selector = 0xf000; 1142 save->cs.base = 0xffff0000; 1143 /* Executable/Readable Code Segment */ 1144 save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK | 1145 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK; 1146 save->cs.limit = 0xffff; 1147 1148 save->gdtr.limit = 0xffff; 1149 save->idtr.limit = 0xffff; 1150 1151 init_sys_seg(&save->ldtr, SEG_TYPE_LDT); 1152 init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); 1153 1154 svm_set_efer(&svm->vcpu, 0); 1155 save->dr6 = 0xffff0ff0; 1156 kvm_set_rflags(&svm->vcpu, 2); 1157 save->rip = 0x0000fff0; 1158 svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; 1159 1160 /* 1161 * This is the guest-visible cr0 value. 1162 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. 1163 */ 1164 svm->vcpu.arch.cr0 = 0; 1165 (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET); 1166 1167 save->cr4 = X86_CR4_PAE; 1168 /* rdx = ?? */ 1169 1170 if (npt_enabled) { 1171 /* Setup VMCB for Nested Paging */ 1172 control->nested_ctl = 1; 1173 clr_intercept(svm, INTERCEPT_INVLPG); 1174 clr_exception_intercept(svm, PF_VECTOR); 1175 clr_cr_intercept(svm, INTERCEPT_CR3_READ); 1176 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE); 1177 save->g_pat = 0x0007040600070406ULL; 1178 save->cr3 = 0; 1179 save->cr4 = 0; 1180 } 1181 svm->asid_generation = 0; 1182 1183 svm->nested.vmcb = 0; 1184 svm->vcpu.arch.hflags = 0; 1185 1186 if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { 1187 control->pause_filter_count = 3000; 1188 set_intercept(svm, INTERCEPT_PAUSE); 1189 } 1190 1191 mark_all_dirty(svm->vmcb); 1192 1193 enable_gif(svm); 1194} 1195 1196static void svm_vcpu_reset(struct kvm_vcpu *vcpu) 1197{ 1198 struct vcpu_svm *svm = to_svm(vcpu); 1199 u32 dummy; 1200 u32 eax = 1; 1201 1202 init_vmcb(svm); 1203 1204 kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); 1205 kvm_register_write(vcpu, VCPU_REGS_RDX, eax); 1206} 1207 1208static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) 1209{ 1210 struct vcpu_svm *svm; 1211 struct page *page; 1212 struct page *msrpm_pages; 1213 struct page *hsave_page; 1214 struct page *nested_msrpm_pages; 1215 int err; 1216 1217 svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 1218 if (!svm) { 1219 err = -ENOMEM; 1220 goto out; 1221 } 1222 1223 svm->tsc_ratio = TSC_RATIO_DEFAULT; 1224 1225 err = kvm_vcpu_init(&svm->vcpu, kvm, id); 1226 if (err) 1227 goto free_svm; 1228 1229 err = -ENOMEM; 1230 page = alloc_page(GFP_KERNEL); 1231 if (!page) 1232 goto uninit; 1233 1234 msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 1235 if (!msrpm_pages) 1236 goto free_page1; 1237 1238 nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); 1239 if (!nested_msrpm_pages) 1240 goto free_page2; 1241 1242 hsave_page = alloc_page(GFP_KERNEL); 1243 if (!hsave_page) 1244 goto free_page3; 1245 1246 svm->nested.hsave = page_address(hsave_page); 1247 1248 svm->msrpm = page_address(msrpm_pages); 1249 svm_vcpu_init_msrpm(svm->msrpm); 1250 1251 svm->nested.msrpm = page_address(nested_msrpm_pages); 1252 svm_vcpu_init_msrpm(svm->nested.msrpm); 1253 1254 svm->vmcb = page_address(page); 1255 clear_page(svm->vmcb); 1256 svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; 1257 svm->asid_generation = 0; 1258 init_vmcb(svm); 1259 1260 svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | 1261 MSR_IA32_APICBASE_ENABLE; 1262 if (kvm_vcpu_is_bsp(&svm->vcpu)) 1263 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; 1264 1265 svm_init_osvw(&svm->vcpu); 1266 1267 return &svm->vcpu; 1268 1269free_page3: 1270 __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); 1271free_page2: 1272 __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); 1273free_page1: 1274 __free_page(page); 1275uninit: 1276 kvm_vcpu_uninit(&svm->vcpu); 1277free_svm: 1278 kmem_cache_free(kvm_vcpu_cache, svm); 1279out: 1280 return ERR_PTR(err); 1281} 1282 1283static void svm_free_vcpu(struct kvm_vcpu *vcpu) 1284{ 1285 struct vcpu_svm *svm = to_svm(vcpu); 1286 1287 __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT)); 1288 __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); 1289 __free_page(virt_to_page(svm->nested.hsave)); 1290 __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); 1291 kvm_vcpu_uninit(vcpu); 1292 kmem_cache_free(kvm_vcpu_cache, svm); 1293} 1294 1295static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 1296{ 1297 struct vcpu_svm *svm = to_svm(vcpu); 1298 int i; 1299 1300 if (unlikely(cpu != vcpu->cpu)) { 1301 svm->asid_generation = 0; 1302 mark_all_dirty(svm->vmcb); 1303 } 1304 1305#ifdef CONFIG_X86_64 1306 rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base); 1307#endif 1308 savesegment(fs, svm->host.fs); 1309 savesegment(gs, svm->host.gs); 1310 svm->host.ldt = kvm_read_ldt(); 1311 1312 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 1313 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1314 1315 if (static_cpu_has(X86_FEATURE_TSCRATEMSR) && 1316 svm->tsc_ratio != __this_cpu_read(current_tsc_ratio)) { 1317 __this_cpu_write(current_tsc_ratio, svm->tsc_ratio); 1318 wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio); 1319 } 1320} 1321 1322static void svm_vcpu_put(struct kvm_vcpu *vcpu) 1323{ 1324 struct vcpu_svm *svm = to_svm(vcpu); 1325 int i; 1326 1327 ++vcpu->stat.host_state_reload; 1328 kvm_load_ldt(svm->host.ldt); 1329#ifdef CONFIG_X86_64 1330 loadsegment(fs, svm->host.fs); 1331 wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); 1332 load_gs_index(svm->host.gs); 1333#else 1334#ifdef CONFIG_X86_32_LAZY_GS 1335 loadsegment(gs, svm->host.gs); 1336#endif 1337#endif 1338 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 1339 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); 1340} 1341 1342static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu) 1343{ 1344 return to_svm(vcpu)->vmcb->save.rflags; 1345} 1346 1347static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) 1348{ 1349 /* 1350 * Any change of EFLAGS.VM is accompained by a reload of SS 1351 * (caused by either a task switch or an inter-privilege IRET), 1352 * so we do not need to update the CPL here. 1353 */ 1354 to_svm(vcpu)->vmcb->save.rflags = rflags; 1355} 1356 1357static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) 1358{ 1359 switch (reg) { 1360 case VCPU_EXREG_PDPTR: 1361 BUG_ON(!npt_enabled); 1362 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); 1363 break; 1364 default: 1365 BUG(); 1366 } 1367} 1368 1369static void svm_set_vintr(struct vcpu_svm *svm) 1370{ 1371 set_intercept(svm, INTERCEPT_VINTR); 1372} 1373 1374static void svm_clear_vintr(struct vcpu_svm *svm) 1375{ 1376 clr_intercept(svm, INTERCEPT_VINTR); 1377} 1378 1379static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg) 1380{ 1381 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; 1382 1383 switch (seg) { 1384 case VCPU_SREG_CS: return &save->cs; 1385 case VCPU_SREG_DS: return &save->ds; 1386 case VCPU_SREG_ES: return &save->es; 1387 case VCPU_SREG_FS: return &save->fs; 1388 case VCPU_SREG_GS: return &save->gs; 1389 case VCPU_SREG_SS: return &save->ss; 1390 case VCPU_SREG_TR: return &save->tr; 1391 case VCPU_SREG_LDTR: return &save->ldtr; 1392 } 1393 BUG(); 1394 return NULL; 1395} 1396 1397static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg) 1398{ 1399 struct vmcb_seg *s = svm_seg(vcpu, seg); 1400 1401 return s->base; 1402} 1403 1404static void svm_get_segment(struct kvm_vcpu *vcpu, 1405 struct kvm_segment *var, int seg) 1406{ 1407 struct vmcb_seg *s = svm_seg(vcpu, seg); 1408 1409 var->base = s->base; 1410 var->limit = s->limit; 1411 var->selector = s->selector; 1412 var->type = s->attrib & SVM_SELECTOR_TYPE_MASK; 1413 var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1; 1414 var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; 1415 var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1; 1416 var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1; 1417 var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; 1418 var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; 1419 1420 /* 1421 * AMD CPUs circa 2014 track the G bit for all segments except CS. 1422 * However, the SVM spec states that the G bit is not observed by the 1423 * CPU, and some VMware virtual CPUs drop the G bit for all segments. 1424 * So let's synthesize a legal G bit for all segments, this helps 1425 * running KVM nested. It also helps cross-vendor migration, because 1426 * Intel's vmentry has a check on the 'G' bit. 1427 */ 1428 var->g = s->limit > 0xfffff; 1429 1430 /* 1431 * AMD's VMCB does not have an explicit unusable field, so emulate it 1432 * for cross vendor migration purposes by "not present" 1433 */ 1434 var->unusable = !var->present || (var->type == 0); 1435 1436 switch (seg) { 1437 case VCPU_SREG_TR: 1438 /* 1439 * Work around a bug where the busy flag in the tr selector 1440 * isn't exposed 1441 */ 1442 var->type |= 0x2; 1443 break; 1444 case VCPU_SREG_DS: 1445 case VCPU_SREG_ES: 1446 case VCPU_SREG_FS: 1447 case VCPU_SREG_GS: 1448 /* 1449 * The accessed bit must always be set in the segment 1450 * descriptor cache, although it can be cleared in the 1451 * descriptor, the cached bit always remains at 1. Since 1452 * Intel has a check on this, set it here to support 1453 * cross-vendor migration. 1454 */ 1455 if (!var->unusable) 1456 var->type |= 0x1; 1457 break; 1458 case VCPU_SREG_SS: 1459 /* 1460 * On AMD CPUs sometimes the DB bit in the segment 1461 * descriptor is left as 1, although the whole segment has 1462 * been made unusable. Clear it here to pass an Intel VMX 1463 * entry check when cross vendor migrating. 1464 */ 1465 if (var->unusable) 1466 var->db = 0; 1467 var->dpl = to_svm(vcpu)->vmcb->save.cpl; 1468 break; 1469 } 1470} 1471 1472static int svm_get_cpl(struct kvm_vcpu *vcpu) 1473{ 1474 struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save; 1475 1476 return save->cpl; 1477} 1478 1479static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1480{ 1481 struct vcpu_svm *svm = to_svm(vcpu); 1482 1483 dt->size = svm->vmcb->save.idtr.limit; 1484 dt->address = svm->vmcb->save.idtr.base; 1485} 1486 1487static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1488{ 1489 struct vcpu_svm *svm = to_svm(vcpu); 1490 1491 svm->vmcb->save.idtr.limit = dt->size; 1492 svm->vmcb->save.idtr.base = dt->address ; 1493 mark_dirty(svm->vmcb, VMCB_DT); 1494} 1495 1496static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1497{ 1498 struct vcpu_svm *svm = to_svm(vcpu); 1499 1500 dt->size = svm->vmcb->save.gdtr.limit; 1501 dt->address = svm->vmcb->save.gdtr.base; 1502} 1503 1504static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) 1505{ 1506 struct vcpu_svm *svm = to_svm(vcpu); 1507 1508 svm->vmcb->save.gdtr.limit = dt->size; 1509 svm->vmcb->save.gdtr.base = dt->address ; 1510 mark_dirty(svm->vmcb, VMCB_DT); 1511} 1512 1513static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) 1514{ 1515} 1516 1517static void svm_decache_cr3(struct kvm_vcpu *vcpu) 1518{ 1519} 1520 1521static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) 1522{ 1523} 1524 1525static void update_cr0_intercept(struct vcpu_svm *svm) 1526{ 1527 ulong gcr0 = svm->vcpu.arch.cr0; 1528 u64 *hcr0 = &svm->vmcb->save.cr0; 1529 1530 if (!svm->vcpu.fpu_active) 1531 *hcr0 |= SVM_CR0_SELECTIVE_MASK; 1532 else 1533 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) 1534 | (gcr0 & SVM_CR0_SELECTIVE_MASK); 1535 1536 mark_dirty(svm->vmcb, VMCB_CR); 1537 1538 if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { 1539 clr_cr_intercept(svm, INTERCEPT_CR0_READ); 1540 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1541 } else { 1542 set_cr_intercept(svm, INTERCEPT_CR0_READ); 1543 set_cr_intercept(svm, INTERCEPT_CR0_WRITE); 1544 } 1545} 1546 1547static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 1548{ 1549 struct vcpu_svm *svm = to_svm(vcpu); 1550 1551#ifdef CONFIG_X86_64 1552 if (vcpu->arch.efer & EFER_LME) { 1553 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { 1554 vcpu->arch.efer |= EFER_LMA; 1555 svm->vmcb->save.efer |= EFER_LMA | EFER_LME; 1556 } 1557 1558 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { 1559 vcpu->arch.efer &= ~EFER_LMA; 1560 svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); 1561 } 1562 } 1563#endif 1564 vcpu->arch.cr0 = cr0; 1565 1566 if (!npt_enabled) 1567 cr0 |= X86_CR0_PG | X86_CR0_WP; 1568 1569 if (!vcpu->fpu_active) 1570 cr0 |= X86_CR0_TS; 1571 /* 1572 * re-enable caching here because the QEMU bios 1573 * does not do it - this results in some delay at 1574 * reboot 1575 */ 1576 cr0 &= ~(X86_CR0_CD | X86_CR0_NW); 1577 svm->vmcb->save.cr0 = cr0; 1578 mark_dirty(svm->vmcb, VMCB_CR); 1579 update_cr0_intercept(svm); 1580} 1581 1582static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1583{ 1584 unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; 1585 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; 1586 1587 if (cr4 & X86_CR4_VMXE) 1588 return 1; 1589 1590 if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) 1591 svm_flush_tlb(vcpu); 1592 1593 vcpu->arch.cr4 = cr4; 1594 if (!npt_enabled) 1595 cr4 |= X86_CR4_PAE; 1596 cr4 |= host_cr4_mce; 1597 to_svm(vcpu)->vmcb->save.cr4 = cr4; 1598 mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); 1599 return 0; 1600} 1601 1602static void svm_set_segment(struct kvm_vcpu *vcpu, 1603 struct kvm_segment *var, int seg) 1604{ 1605 struct vcpu_svm *svm = to_svm(vcpu); 1606 struct vmcb_seg *s = svm_seg(vcpu, seg); 1607 1608 s->base = var->base; 1609 s->limit = var->limit; 1610 s->selector = var->selector; 1611 if (var->unusable) 1612 s->attrib = 0; 1613 else { 1614 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK); 1615 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT; 1616 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT; 1617 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT; 1618 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT; 1619 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT; 1620 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT; 1621 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT; 1622 } 1623 1624 /* 1625 * This is always accurate, except if SYSRET returned to a segment 1626 * with SS.DPL != 3. Intel does not have this quirk, and always 1627 * forces SS.DPL to 3 on sysret, so we ignore that case; fixing it 1628 * would entail passing the CPL to userspace and back. 1629 */ 1630 if (seg == VCPU_SREG_SS) 1631 svm->vmcb->save.cpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3; 1632 1633 mark_dirty(svm->vmcb, VMCB_SEG); 1634} 1635 1636static void update_db_bp_intercept(struct kvm_vcpu *vcpu) 1637{ 1638 struct vcpu_svm *svm = to_svm(vcpu); 1639 1640 clr_exception_intercept(svm, DB_VECTOR); 1641 clr_exception_intercept(svm, BP_VECTOR); 1642 1643 if (svm->nmi_singlestep) 1644 set_exception_intercept(svm, DB_VECTOR); 1645 1646 if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { 1647 if (vcpu->guest_debug & 1648 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) 1649 set_exception_intercept(svm, DB_VECTOR); 1650 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) 1651 set_exception_intercept(svm, BP_VECTOR); 1652 } else 1653 vcpu->guest_debug = 0; 1654} 1655 1656static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) 1657{ 1658 if (sd->next_asid > sd->max_asid) { 1659 ++sd->asid_generation; 1660 sd->next_asid = 1; 1661 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1662 } 1663 1664 svm->asid_generation = sd->asid_generation; 1665 svm->vmcb->control.asid = sd->next_asid++; 1666 1667 mark_dirty(svm->vmcb, VMCB_ASID); 1668} 1669 1670static u64 svm_get_dr6(struct kvm_vcpu *vcpu) 1671{ 1672 return to_svm(vcpu)->vmcb->save.dr6; 1673} 1674 1675static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) 1676{ 1677 struct vcpu_svm *svm = to_svm(vcpu); 1678 1679 svm->vmcb->save.dr6 = value; 1680 mark_dirty(svm->vmcb, VMCB_DR); 1681} 1682 1683static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) 1684{ 1685 struct vcpu_svm *svm = to_svm(vcpu); 1686 1687 get_debugreg(vcpu->arch.db[0], 0); 1688 get_debugreg(vcpu->arch.db[1], 1); 1689 get_debugreg(vcpu->arch.db[2], 2); 1690 get_debugreg(vcpu->arch.db[3], 3); 1691 vcpu->arch.dr6 = svm_get_dr6(vcpu); 1692 vcpu->arch.dr7 = svm->vmcb->save.dr7; 1693 1694 vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; 1695 set_dr_intercepts(svm); 1696} 1697 1698static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) 1699{ 1700 struct vcpu_svm *svm = to_svm(vcpu); 1701 1702 svm->vmcb->save.dr7 = value; 1703 mark_dirty(svm->vmcb, VMCB_DR); 1704} 1705 1706static int pf_interception(struct vcpu_svm *svm) 1707{ 1708 u64 fault_address = svm->vmcb->control.exit_info_2; 1709 u32 error_code; 1710 int r = 1; 1711 1712 switch (svm->apf_reason) { 1713 default: 1714 error_code = svm->vmcb->control.exit_info_1; 1715 1716 trace_kvm_page_fault(fault_address, error_code); 1717 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu)) 1718 kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); 1719 r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code, 1720 svm->vmcb->control.insn_bytes, 1721 svm->vmcb->control.insn_len); 1722 break; 1723 case KVM_PV_REASON_PAGE_NOT_PRESENT: 1724 svm->apf_reason = 0; 1725 local_irq_disable(); 1726 kvm_async_pf_task_wait(fault_address); 1727 local_irq_enable(); 1728 break; 1729 case KVM_PV_REASON_PAGE_READY: 1730 svm->apf_reason = 0; 1731 local_irq_disable(); 1732 kvm_async_pf_task_wake(fault_address); 1733 local_irq_enable(); 1734 break; 1735 } 1736 return r; 1737} 1738 1739static int db_interception(struct vcpu_svm *svm) 1740{ 1741 struct kvm_run *kvm_run = svm->vcpu.run; 1742 1743 if (!(svm->vcpu.guest_debug & 1744 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && 1745 !svm->nmi_singlestep) { 1746 kvm_queue_exception(&svm->vcpu, DB_VECTOR); 1747 return 1; 1748 } 1749 1750 if (svm->nmi_singlestep) { 1751 svm->nmi_singlestep = false; 1752 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) 1753 svm->vmcb->save.rflags &= 1754 ~(X86_EFLAGS_TF | X86_EFLAGS_RF); 1755 update_db_bp_intercept(&svm->vcpu); 1756 } 1757 1758 if (svm->vcpu.guest_debug & 1759 (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { 1760 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1761 kvm_run->debug.arch.pc = 1762 svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1763 kvm_run->debug.arch.exception = DB_VECTOR; 1764 return 0; 1765 } 1766 1767 return 1; 1768} 1769 1770static int bp_interception(struct vcpu_svm *svm) 1771{ 1772 struct kvm_run *kvm_run = svm->vcpu.run; 1773 1774 kvm_run->exit_reason = KVM_EXIT_DEBUG; 1775 kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; 1776 kvm_run->debug.arch.exception = BP_VECTOR; 1777 return 0; 1778} 1779 1780static int ud_interception(struct vcpu_svm *svm) 1781{ 1782 int er; 1783 1784 er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); 1785 if (er != EMULATE_DONE) 1786 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 1787 return 1; 1788} 1789 1790static void svm_fpu_activate(struct kvm_vcpu *vcpu) 1791{ 1792 struct vcpu_svm *svm = to_svm(vcpu); 1793 1794 clr_exception_intercept(svm, NM_VECTOR); 1795 1796 svm->vcpu.fpu_active = 1; 1797 update_cr0_intercept(svm); 1798} 1799 1800static int nm_interception(struct vcpu_svm *svm) 1801{ 1802 svm_fpu_activate(&svm->vcpu); 1803 return 1; 1804} 1805 1806static bool is_erratum_383(void) 1807{ 1808 int err, i; 1809 u64 value; 1810 1811 if (!erratum_383_found) 1812 return false; 1813 1814 value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err); 1815 if (err) 1816 return false; 1817 1818 /* Bit 62 may or may not be set for this mce */ 1819 value &= ~(1ULL << 62); 1820 1821 if (value != 0xb600000000010015ULL) 1822 return false; 1823 1824 /* Clear MCi_STATUS registers */ 1825 for (i = 0; i < 6; ++i) 1826 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0); 1827 1828 value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err); 1829 if (!err) { 1830 u32 low, high; 1831 1832 value &= ~(1ULL << 2); 1833 low = lower_32_bits(value); 1834 high = upper_32_bits(value); 1835 1836 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high); 1837 } 1838 1839 /* Flush tlb to evict multi-match entries */ 1840 __flush_tlb_all(); 1841 1842 return true; 1843} 1844 1845static void svm_handle_mce(struct vcpu_svm *svm) 1846{ 1847 if (is_erratum_383()) { 1848 /* 1849 * Erratum 383 triggered. Guest state is corrupt so kill the 1850 * guest. 1851 */ 1852 pr_err("KVM: Guest triggered AMD Erratum 383\n"); 1853 1854 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu); 1855 1856 return; 1857 } 1858 1859 /* 1860 * On an #MC intercept the MCE handler is not called automatically in 1861 * the host. So do it by hand here. 1862 */ 1863 asm volatile ( 1864 "int $0x12\n"); 1865 /* not sure if we ever come back to this point */ 1866 1867 return; 1868} 1869 1870static int mc_interception(struct vcpu_svm *svm) 1871{ 1872 return 1; 1873} 1874 1875static int shutdown_interception(struct vcpu_svm *svm) 1876{ 1877 struct kvm_run *kvm_run = svm->vcpu.run; 1878 1879 /* 1880 * VMCB is undefined after a SHUTDOWN intercept 1881 * so reinitialize it. 1882 */ 1883 clear_page(svm->vmcb); 1884 init_vmcb(svm); 1885 1886 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; 1887 return 0; 1888} 1889 1890static int io_interception(struct vcpu_svm *svm) 1891{ 1892 struct kvm_vcpu *vcpu = &svm->vcpu; 1893 u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ 1894 int size, in, string; 1895 unsigned port; 1896 1897 ++svm->vcpu.stat.io_exits; 1898 string = (io_info & SVM_IOIO_STR_MASK) != 0; 1899 in = (io_info & SVM_IOIO_TYPE_MASK) != 0; 1900 if (string || in) 1901 return emulate_instruction(vcpu, 0) == EMULATE_DONE; 1902 1903 port = io_info >> 16; 1904 size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; 1905 svm->next_rip = svm->vmcb->control.exit_info_2; 1906 skip_emulated_instruction(&svm->vcpu); 1907 1908 return kvm_fast_pio_out(vcpu, size, port); 1909} 1910 1911static int nmi_interception(struct vcpu_svm *svm) 1912{ 1913 return 1; 1914} 1915 1916static int intr_interception(struct vcpu_svm *svm) 1917{ 1918 ++svm->vcpu.stat.irq_exits; 1919 return 1; 1920} 1921 1922static int nop_on_interception(struct vcpu_svm *svm) 1923{ 1924 return 1; 1925} 1926 1927static int halt_interception(struct vcpu_svm *svm) 1928{ 1929 svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; 1930 skip_emulated_instruction(&svm->vcpu); 1931 return kvm_emulate_halt(&svm->vcpu); 1932} 1933 1934static int vmmcall_interception(struct vcpu_svm *svm) 1935{ 1936 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 1937 skip_emulated_instruction(&svm->vcpu); 1938 kvm_emulate_hypercall(&svm->vcpu); 1939 return 1; 1940} 1941 1942static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) 1943{ 1944 struct vcpu_svm *svm = to_svm(vcpu); 1945 1946 return svm->nested.nested_cr3; 1947} 1948 1949static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) 1950{ 1951 struct vcpu_svm *svm = to_svm(vcpu); 1952 u64 cr3 = svm->nested.nested_cr3; 1953 u64 pdpte; 1954 int ret; 1955 1956 ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte, 1957 offset_in_page(cr3) + index * 8, 8); 1958 if (ret) 1959 return 0; 1960 return pdpte; 1961} 1962 1963static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, 1964 unsigned long root) 1965{ 1966 struct vcpu_svm *svm = to_svm(vcpu); 1967 1968 svm->vmcb->control.nested_cr3 = root; 1969 mark_dirty(svm->vmcb, VMCB_NPT); 1970 svm_flush_tlb(vcpu); 1971} 1972 1973static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, 1974 struct x86_exception *fault) 1975{ 1976 struct vcpu_svm *svm = to_svm(vcpu); 1977 1978 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) { 1979 /* 1980 * TODO: track the cause of the nested page fault, and 1981 * correctly fill in the high bits of exit_info_1. 1982 */ 1983 svm->vmcb->control.exit_code = SVM_EXIT_NPF; 1984 svm->vmcb->control.exit_code_hi = 0; 1985 svm->vmcb->control.exit_info_1 = (1ULL << 32); 1986 svm->vmcb->control.exit_info_2 = fault->address; 1987 } 1988 1989 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL; 1990 svm->vmcb->control.exit_info_1 |= fault->error_code; 1991 1992 /* 1993 * The present bit is always zero for page structure faults on real 1994 * hardware. 1995 */ 1996 if (svm->vmcb->control.exit_info_1 & (2ULL << 32)) 1997 svm->vmcb->control.exit_info_1 &= ~1; 1998 1999 nested_svm_vmexit(svm); 2000} 2001 2002static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 2003{ 2004 kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu); 2005 2006 vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; 2007 vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; 2008 vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; 2009 vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; 2010 vcpu->arch.mmu.shadow_root_level = get_npt_level(); 2011 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 2012} 2013 2014static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) 2015{ 2016 vcpu->arch.walk_mmu = &vcpu->arch.mmu; 2017} 2018 2019static int nested_svm_check_permissions(struct vcpu_svm *svm) 2020{ 2021 if (!(svm->vcpu.arch.efer & EFER_SVME) 2022 || !is_paging(&svm->vcpu)) { 2023 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2024 return 1; 2025 } 2026 2027 if (svm->vmcb->save.cpl) { 2028 kvm_inject_gp(&svm->vcpu, 0); 2029 return 1; 2030 } 2031 2032 return 0; 2033} 2034 2035static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 2036 bool has_error_code, u32 error_code) 2037{ 2038 int vmexit; 2039 2040 if (!is_guest_mode(&svm->vcpu)) 2041 return 0; 2042 2043 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; 2044 svm->vmcb->control.exit_code_hi = 0; 2045 svm->vmcb->control.exit_info_1 = error_code; 2046 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; 2047 2048 vmexit = nested_svm_intercept(svm); 2049 if (vmexit == NESTED_EXIT_DONE) 2050 svm->nested.exit_required = true; 2051 2052 return vmexit; 2053} 2054 2055/* This function returns true if it is save to enable the irq window */ 2056static inline bool nested_svm_intr(struct vcpu_svm *svm) 2057{ 2058 if (!is_guest_mode(&svm->vcpu)) 2059 return true; 2060 2061 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 2062 return true; 2063 2064 if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) 2065 return false; 2066 2067 /* 2068 * if vmexit was already requested (by intercepted exception 2069 * for instance) do not overwrite it with "external interrupt" 2070 * vmexit. 2071 */ 2072 if (svm->nested.exit_required) 2073 return false; 2074 2075 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 2076 svm->vmcb->control.exit_info_1 = 0; 2077 svm->vmcb->control.exit_info_2 = 0; 2078 2079 if (svm->nested.intercept & 1ULL) { 2080 /* 2081 * The #vmexit can't be emulated here directly because this 2082 * code path runs with irqs and preemption disabled. A 2083 * #vmexit emulation might sleep. Only signal request for 2084 * the #vmexit here. 2085 */ 2086 svm->nested.exit_required = true; 2087 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); 2088 return false; 2089 } 2090 2091 return true; 2092} 2093 2094/* This function returns true if it is save to enable the nmi window */ 2095static inline bool nested_svm_nmi(struct vcpu_svm *svm) 2096{ 2097 if (!is_guest_mode(&svm->vcpu)) 2098 return true; 2099 2100 if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) 2101 return true; 2102 2103 svm->vmcb->control.exit_code = SVM_EXIT_NMI; 2104 svm->nested.exit_required = true; 2105 2106 return false; 2107} 2108 2109static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) 2110{ 2111 struct page *page; 2112 2113 might_sleep(); 2114 2115 page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); 2116 if (is_error_page(page)) 2117 goto error; 2118 2119 *_page = page; 2120 2121 return kmap(page); 2122 2123error: 2124 kvm_inject_gp(&svm->vcpu, 0); 2125 2126 return NULL; 2127} 2128 2129static void nested_svm_unmap(struct page *page) 2130{ 2131 kunmap(page); 2132 kvm_release_page_dirty(page); 2133} 2134 2135static int nested_svm_intercept_ioio(struct vcpu_svm *svm) 2136{ 2137 unsigned port, size, iopm_len; 2138 u16 val, mask; 2139 u8 start_bit; 2140 u64 gpa; 2141 2142 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) 2143 return NESTED_EXIT_HOST; 2144 2145 port = svm->vmcb->control.exit_info_1 >> 16; 2146 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> 2147 SVM_IOIO_SIZE_SHIFT; 2148 gpa = svm->nested.vmcb_iopm + (port / 8); 2149 start_bit = port % 8; 2150 iopm_len = (start_bit + size > 8) ? 2 : 1; 2151 mask = (0xf >> (4 - size)) << start_bit; 2152 val = 0; 2153 2154 if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, iopm_len)) 2155 return NESTED_EXIT_DONE; 2156 2157 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 2158} 2159 2160static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) 2161{ 2162 u32 offset, msr, value; 2163 int write, mask; 2164 2165 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 2166 return NESTED_EXIT_HOST; 2167 2168 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 2169 offset = svm_msrpm_offset(msr); 2170 write = svm->vmcb->control.exit_info_1 & 1; 2171 mask = 1 << ((2 * (msr & 0xf)) + write); 2172 2173 if (offset == MSR_INVALID) 2174 return NESTED_EXIT_DONE; 2175 2176 /* Offset is in 32 bit units but need in 8 bit units */ 2177 offset *= 4; 2178 2179 if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) 2180 return NESTED_EXIT_DONE; 2181 2182 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 2183} 2184 2185static int nested_svm_exit_special(struct vcpu_svm *svm) 2186{ 2187 u32 exit_code = svm->vmcb->control.exit_code; 2188 2189 switch (exit_code) { 2190 case SVM_EXIT_INTR: 2191 case SVM_EXIT_NMI: 2192 case SVM_EXIT_EXCP_BASE + MC_VECTOR: 2193 return NESTED_EXIT_HOST; 2194 case SVM_EXIT_NPF: 2195 /* For now we are always handling NPFs when using them */ 2196 if (npt_enabled) 2197 return NESTED_EXIT_HOST; 2198 break; 2199 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 2200 /* When we're shadowing, trap PFs, but not async PF */ 2201 if (!npt_enabled && svm->apf_reason == 0) 2202 return NESTED_EXIT_HOST; 2203 break; 2204 case SVM_EXIT_EXCP_BASE + NM_VECTOR: 2205 nm_interception(svm); 2206 break; 2207 default: 2208 break; 2209 } 2210 2211 return NESTED_EXIT_CONTINUE; 2212} 2213 2214/* 2215 * If this function returns true, this #vmexit was already handled 2216 */ 2217static int nested_svm_intercept(struct vcpu_svm *svm) 2218{ 2219 u32 exit_code = svm->vmcb->control.exit_code; 2220 int vmexit = NESTED_EXIT_HOST; 2221 2222 switch (exit_code) { 2223 case SVM_EXIT_MSR: 2224 vmexit = nested_svm_exit_handled_msr(svm); 2225 break; 2226 case SVM_EXIT_IOIO: 2227 vmexit = nested_svm_intercept_ioio(svm); 2228 break; 2229 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { 2230 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); 2231 if (svm->nested.intercept_cr & bit) 2232 vmexit = NESTED_EXIT_DONE; 2233 break; 2234 } 2235 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { 2236 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); 2237 if (svm->nested.intercept_dr & bit) 2238 vmexit = NESTED_EXIT_DONE; 2239 break; 2240 } 2241 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { 2242 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); 2243 if (svm->nested.intercept_exceptions & excp_bits) 2244 vmexit = NESTED_EXIT_DONE; 2245 /* async page fault always cause vmexit */ 2246 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && 2247 svm->apf_reason != 0) 2248 vmexit = NESTED_EXIT_DONE; 2249 break; 2250 } 2251 case SVM_EXIT_ERR: { 2252 vmexit = NESTED_EXIT_DONE; 2253 break; 2254 } 2255 default: { 2256 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 2257 if (svm->nested.intercept & exit_bits) 2258 vmexit = NESTED_EXIT_DONE; 2259 } 2260 } 2261 2262 return vmexit; 2263} 2264 2265static int nested_svm_exit_handled(struct vcpu_svm *svm) 2266{ 2267 int vmexit; 2268 2269 vmexit = nested_svm_intercept(svm); 2270 2271 if (vmexit == NESTED_EXIT_DONE) 2272 nested_svm_vmexit(svm); 2273 2274 return vmexit; 2275} 2276 2277static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb) 2278{ 2279 struct vmcb_control_area *dst = &dst_vmcb->control; 2280 struct vmcb_control_area *from = &from_vmcb->control; 2281 2282 dst->intercept_cr = from->intercept_cr; 2283 dst->intercept_dr = from->intercept_dr; 2284 dst->intercept_exceptions = from->intercept_exceptions; 2285 dst->intercept = from->intercept; 2286 dst->iopm_base_pa = from->iopm_base_pa; 2287 dst->msrpm_base_pa = from->msrpm_base_pa; 2288 dst->tsc_offset = from->tsc_offset; 2289 dst->asid = from->asid; 2290 dst->tlb_ctl = from->tlb_ctl; 2291 dst->int_ctl = from->int_ctl; 2292 dst->int_vector = from->int_vector; 2293 dst->int_state = from->int_state; 2294 dst->exit_code = from->exit_code; 2295 dst->exit_code_hi = from->exit_code_hi; 2296 dst->exit_info_1 = from->exit_info_1; 2297 dst->exit_info_2 = from->exit_info_2; 2298 dst->exit_int_info = from->exit_int_info; 2299 dst->exit_int_info_err = from->exit_int_info_err; 2300 dst->nested_ctl = from->nested_ctl; 2301 dst->event_inj = from->event_inj; 2302 dst->event_inj_err = from->event_inj_err; 2303 dst->nested_cr3 = from->nested_cr3; 2304 dst->lbr_ctl = from->lbr_ctl; 2305} 2306 2307static int nested_svm_vmexit(struct vcpu_svm *svm) 2308{ 2309 struct vmcb *nested_vmcb; 2310 struct vmcb *hsave = svm->nested.hsave; 2311 struct vmcb *vmcb = svm->vmcb; 2312 struct page *page; 2313 2314 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, 2315 vmcb->control.exit_info_1, 2316 vmcb->control.exit_info_2, 2317 vmcb->control.exit_int_info, 2318 vmcb->control.exit_int_info_err, 2319 KVM_ISA_SVM); 2320 2321 nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); 2322 if (!nested_vmcb) 2323 return 1; 2324 2325 /* Exit Guest-Mode */ 2326 leave_guest_mode(&svm->vcpu); 2327 svm->nested.vmcb = 0; 2328 2329 /* Give the current vmcb to the guest */ 2330 disable_gif(svm); 2331 2332 nested_vmcb->save.es = vmcb->save.es; 2333 nested_vmcb->save.cs = vmcb->save.cs; 2334 nested_vmcb->save.ss = vmcb->save.ss; 2335 nested_vmcb->save.ds = vmcb->save.ds; 2336 nested_vmcb->save.gdtr = vmcb->save.gdtr; 2337 nested_vmcb->save.idtr = vmcb->save.idtr; 2338 nested_vmcb->save.efer = svm->vcpu.arch.efer; 2339 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); 2340 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); 2341 nested_vmcb->save.cr2 = vmcb->save.cr2; 2342 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; 2343 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); 2344 nested_vmcb->save.rip = vmcb->save.rip; 2345 nested_vmcb->save.rsp = vmcb->save.rsp; 2346 nested_vmcb->save.rax = vmcb->save.rax; 2347 nested_vmcb->save.dr7 = vmcb->save.dr7; 2348 nested_vmcb->save.dr6 = vmcb->save.dr6; 2349 nested_vmcb->save.cpl = vmcb->save.cpl; 2350 2351 nested_vmcb->control.int_ctl = vmcb->control.int_ctl; 2352 nested_vmcb->control.int_vector = vmcb->control.int_vector; 2353 nested_vmcb->control.int_state = vmcb->control.int_state; 2354 nested_vmcb->control.exit_code = vmcb->control.exit_code; 2355 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; 2356 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1; 2357 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; 2358 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; 2359 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; 2360 nested_vmcb->control.next_rip = vmcb->control.next_rip; 2361 2362 /* 2363 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have 2364 * to make sure that we do not lose injected events. So check event_inj 2365 * here and copy it to exit_int_info if it is valid. 2366 * Exit_int_info and event_inj can't be both valid because the case 2367 * below only happens on a VMRUN instruction intercept which has 2368 * no valid exit_int_info set. 2369 */ 2370 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { 2371 struct vmcb_control_area *nc = &nested_vmcb->control; 2372 2373 nc->exit_int_info = vmcb->control.event_inj; 2374 nc->exit_int_info_err = vmcb->control.event_inj_err; 2375 } 2376 2377 nested_vmcb->control.tlb_ctl = 0; 2378 nested_vmcb->control.event_inj = 0; 2379 nested_vmcb->control.event_inj_err = 0; 2380 2381 /* We always set V_INTR_MASKING and remember the old value in hflags */ 2382 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 2383 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; 2384 2385 /* Restore the original control entries */ 2386 copy_vmcb_control_area(vmcb, hsave); 2387 2388 kvm_clear_exception_queue(&svm->vcpu); 2389 kvm_clear_interrupt_queue(&svm->vcpu); 2390 2391 svm->nested.nested_cr3 = 0; 2392 2393 /* Restore selected save entries */ 2394 svm->vmcb->save.es = hsave->save.es; 2395 svm->vmcb->save.cs = hsave->save.cs; 2396 svm->vmcb->save.ss = hsave->save.ss; 2397 svm->vmcb->save.ds = hsave->save.ds; 2398 svm->vmcb->save.gdtr = hsave->save.gdtr; 2399 svm->vmcb->save.idtr = hsave->save.idtr; 2400 kvm_set_rflags(&svm->vcpu, hsave->save.rflags); 2401 svm_set_efer(&svm->vcpu, hsave->save.efer); 2402 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); 2403 svm_set_cr4(&svm->vcpu, hsave->save.cr4); 2404 if (npt_enabled) { 2405 svm->vmcb->save.cr3 = hsave->save.cr3; 2406 svm->vcpu.arch.cr3 = hsave->save.cr3; 2407 } else { 2408 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); 2409 } 2410 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax); 2411 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp); 2412 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip); 2413 svm->vmcb->save.dr7 = 0; 2414 svm->vmcb->save.cpl = 0; 2415 svm->vmcb->control.exit_int_info = 0; 2416 2417 mark_all_dirty(svm->vmcb); 2418 2419 nested_svm_unmap(page); 2420 2421 nested_svm_uninit_mmu_context(&svm->vcpu); 2422 kvm_mmu_reset_context(&svm->vcpu); 2423 kvm_mmu_load(&svm->vcpu); 2424 2425 return 0; 2426} 2427 2428static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) 2429{ 2430 /* 2431 * This function merges the msr permission bitmaps of kvm and the 2432 * nested vmcb. It is optimized in that it only merges the parts where 2433 * the kvm msr permission bitmap may contain zero bits 2434 */ 2435 int i; 2436 2437 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 2438 return true; 2439 2440 for (i = 0; i < MSRPM_OFFSETS; i++) { 2441 u32 value, p; 2442 u64 offset; 2443 2444 if (msrpm_offsets[i] == 0xffffffff) 2445 break; 2446 2447 p = msrpm_offsets[i]; 2448 offset = svm->nested.vmcb_msrpm + (p * 4); 2449 2450 if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) 2451 return false; 2452 2453 svm->nested.msrpm[p] = svm->msrpm[p] | value; 2454 } 2455 2456 svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); 2457 2458 return true; 2459} 2460 2461static bool nested_vmcb_checks(struct vmcb *vmcb) 2462{ 2463 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) 2464 return false; 2465 2466 if (vmcb->control.asid == 0) 2467 return false; 2468 2469 if (vmcb->control.nested_ctl && !npt_enabled) 2470 return false; 2471 2472 return true; 2473} 2474 2475static bool nested_svm_vmrun(struct vcpu_svm *svm) 2476{ 2477 struct vmcb *nested_vmcb; 2478 struct vmcb *hsave = svm->nested.hsave; 2479 struct vmcb *vmcb = svm->vmcb; 2480 struct page *page; 2481 u64 vmcb_gpa; 2482 2483 vmcb_gpa = svm->vmcb->save.rax; 2484 2485 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2486 if (!nested_vmcb) 2487 return false; 2488 2489 if (!nested_vmcb_checks(nested_vmcb)) { 2490 nested_vmcb->control.exit_code = SVM_EXIT_ERR; 2491 nested_vmcb->control.exit_code_hi = 0; 2492 nested_vmcb->control.exit_info_1 = 0; 2493 nested_vmcb->control.exit_info_2 = 0; 2494 2495 nested_svm_unmap(page); 2496 2497 return false; 2498 } 2499 2500 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa, 2501 nested_vmcb->save.rip, 2502 nested_vmcb->control.int_ctl, 2503 nested_vmcb->control.event_inj, 2504 nested_vmcb->control.nested_ctl); 2505 2506 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff, 2507 nested_vmcb->control.intercept_cr >> 16, 2508 nested_vmcb->control.intercept_exceptions, 2509 nested_vmcb->control.intercept); 2510 2511 /* Clear internal status */ 2512 kvm_clear_exception_queue(&svm->vcpu); 2513 kvm_clear_interrupt_queue(&svm->vcpu); 2514 2515 /* 2516 * Save the old vmcb, so we don't need to pick what we save, but can 2517 * restore everything when a VMEXIT occurs 2518 */ 2519 hsave->save.es = vmcb->save.es; 2520 hsave->save.cs = vmcb->save.cs; 2521 hsave->save.ss = vmcb->save.ss; 2522 hsave->save.ds = vmcb->save.ds; 2523 hsave->save.gdtr = vmcb->save.gdtr; 2524 hsave->save.idtr = vmcb->save.idtr; 2525 hsave->save.efer = svm->vcpu.arch.efer; 2526 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); 2527 hsave->save.cr4 = svm->vcpu.arch.cr4; 2528 hsave->save.rflags = kvm_get_rflags(&svm->vcpu); 2529 hsave->save.rip = kvm_rip_read(&svm->vcpu); 2530 hsave->save.rsp = vmcb->save.rsp; 2531 hsave->save.rax = vmcb->save.rax; 2532 if (npt_enabled) 2533 hsave->save.cr3 = vmcb->save.cr3; 2534 else 2535 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); 2536 2537 copy_vmcb_control_area(hsave, vmcb); 2538 2539 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) 2540 svm->vcpu.arch.hflags |= HF_HIF_MASK; 2541 else 2542 svm->vcpu.arch.hflags &= ~HF_HIF_MASK; 2543 2544 if (nested_vmcb->control.nested_ctl) { 2545 kvm_mmu_unload(&svm->vcpu); 2546 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; 2547 nested_svm_init_mmu_context(&svm->vcpu); 2548 } 2549 2550 /* Load the nested guest state */ 2551 svm->vmcb->save.es = nested_vmcb->save.es; 2552 svm->vmcb->save.cs = nested_vmcb->save.cs; 2553 svm->vmcb->save.ss = nested_vmcb->save.ss; 2554 svm->vmcb->save.ds = nested_vmcb->save.ds; 2555 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; 2556 svm->vmcb->save.idtr = nested_vmcb->save.idtr; 2557 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); 2558 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); 2559 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); 2560 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); 2561 if (npt_enabled) { 2562 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 2563 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 2564 } else 2565 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 2566 2567 /* Guest paging mode is active - reset mmu */ 2568 kvm_mmu_reset_context(&svm->vcpu); 2569 2570 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; 2571 kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); 2572 kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); 2573 kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); 2574 2575 /* In case we don't even reach vcpu_run, the fields are not updated */ 2576 svm->vmcb->save.rax = nested_vmcb->save.rax; 2577 svm->vmcb->save.rsp = nested_vmcb->save.rsp; 2578 svm->vmcb->save.rip = nested_vmcb->save.rip; 2579 svm->vmcb->save.dr7 = nested_vmcb->save.dr7; 2580 svm->vmcb->save.dr6 = nested_vmcb->save.dr6; 2581 svm->vmcb->save.cpl = nested_vmcb->save.cpl; 2582 2583 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; 2584 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; 2585 2586 /* cache intercepts */ 2587 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; 2588 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; 2589 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; 2590 svm->nested.intercept = nested_vmcb->control.intercept; 2591 2592 svm_flush_tlb(&svm->vcpu); 2593 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; 2594 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) 2595 svm->vcpu.arch.hflags |= HF_VINTR_MASK; 2596 else 2597 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 2598 2599 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { 2600 /* We only want the cr8 intercept bits of the guest */ 2601 clr_cr_intercept(svm, INTERCEPT_CR8_READ); 2602 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); 2603 } 2604 2605 /* We don't want to see VMMCALLs from a nested guest */ 2606 clr_intercept(svm, INTERCEPT_VMMCALL); 2607 2608 svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; 2609 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 2610 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 2611 svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; 2612 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 2613 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 2614 2615 nested_svm_unmap(page); 2616 2617 /* Enter Guest-Mode */ 2618 enter_guest_mode(&svm->vcpu); 2619 2620 /* 2621 * Merge guest and host intercepts - must be called with vcpu in 2622 * guest-mode to take affect here 2623 */ 2624 recalc_intercepts(svm); 2625 2626 svm->nested.vmcb = vmcb_gpa; 2627 2628 enable_gif(svm); 2629 2630 mark_all_dirty(svm->vmcb); 2631 2632 return true; 2633} 2634 2635static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) 2636{ 2637 to_vmcb->save.fs = from_vmcb->save.fs; 2638 to_vmcb->save.gs = from_vmcb->save.gs; 2639 to_vmcb->save.tr = from_vmcb->save.tr; 2640 to_vmcb->save.ldtr = from_vmcb->save.ldtr; 2641 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; 2642 to_vmcb->save.star = from_vmcb->save.star; 2643 to_vmcb->save.lstar = from_vmcb->save.lstar; 2644 to_vmcb->save.cstar = from_vmcb->save.cstar; 2645 to_vmcb->save.sfmask = from_vmcb->save.sfmask; 2646 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; 2647 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; 2648 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; 2649} 2650 2651static int vmload_interception(struct vcpu_svm *svm) 2652{ 2653 struct vmcb *nested_vmcb; 2654 struct page *page; 2655 2656 if (nested_svm_check_permissions(svm)) 2657 return 1; 2658 2659 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2660 if (!nested_vmcb) 2661 return 1; 2662 2663 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2664 skip_emulated_instruction(&svm->vcpu); 2665 2666 nested_svm_vmloadsave(nested_vmcb, svm->vmcb); 2667 nested_svm_unmap(page); 2668 2669 return 1; 2670} 2671 2672static int vmsave_interception(struct vcpu_svm *svm) 2673{ 2674 struct vmcb *nested_vmcb; 2675 struct page *page; 2676 2677 if (nested_svm_check_permissions(svm)) 2678 return 1; 2679 2680 nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); 2681 if (!nested_vmcb) 2682 return 1; 2683 2684 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2685 skip_emulated_instruction(&svm->vcpu); 2686 2687 nested_svm_vmloadsave(svm->vmcb, nested_vmcb); 2688 nested_svm_unmap(page); 2689 2690 return 1; 2691} 2692 2693static int vmrun_interception(struct vcpu_svm *svm) 2694{ 2695 if (nested_svm_check_permissions(svm)) 2696 return 1; 2697 2698 /* Save rip after vmrun instruction */ 2699 kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3); 2700 2701 if (!nested_svm_vmrun(svm)) 2702 return 1; 2703 2704 if (!nested_svm_vmrun_msrpm(svm)) 2705 goto failed; 2706 2707 return 1; 2708 2709failed: 2710 2711 svm->vmcb->control.exit_code = SVM_EXIT_ERR; 2712 svm->vmcb->control.exit_code_hi = 0; 2713 svm->vmcb->control.exit_info_1 = 0; 2714 svm->vmcb->control.exit_info_2 = 0; 2715 2716 nested_svm_vmexit(svm); 2717 2718 return 1; 2719} 2720 2721static int stgi_interception(struct vcpu_svm *svm) 2722{ 2723 if (nested_svm_check_permissions(svm)) 2724 return 1; 2725 2726 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2727 skip_emulated_instruction(&svm->vcpu); 2728 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 2729 2730 enable_gif(svm); 2731 2732 return 1; 2733} 2734 2735static int clgi_interception(struct vcpu_svm *svm) 2736{ 2737 if (nested_svm_check_permissions(svm)) 2738 return 1; 2739 2740 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2741 skip_emulated_instruction(&svm->vcpu); 2742 2743 disable_gif(svm); 2744 2745 /* After a CLGI no interrupts should come */ 2746 svm_clear_vintr(svm); 2747 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 2748 2749 mark_dirty(svm->vmcb, VMCB_INTR); 2750 2751 return 1; 2752} 2753 2754static int invlpga_interception(struct vcpu_svm *svm) 2755{ 2756 struct kvm_vcpu *vcpu = &svm->vcpu; 2757 2758 trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], 2759 vcpu->arch.regs[VCPU_REGS_RAX]); 2760 2761 /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ 2762 kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); 2763 2764 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2765 skip_emulated_instruction(&svm->vcpu); 2766 return 1; 2767} 2768 2769static int skinit_interception(struct vcpu_svm *svm) 2770{ 2771 trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); 2772 2773 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2774 return 1; 2775} 2776 2777static int xsetbv_interception(struct vcpu_svm *svm) 2778{ 2779 u64 new_bv = kvm_read_edx_eax(&svm->vcpu); 2780 u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); 2781 2782 if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { 2783 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; 2784 skip_emulated_instruction(&svm->vcpu); 2785 } 2786 2787 return 1; 2788} 2789 2790static int task_switch_interception(struct vcpu_svm *svm) 2791{ 2792 u16 tss_selector; 2793 int reason; 2794 int int_type = svm->vmcb->control.exit_int_info & 2795 SVM_EXITINTINFO_TYPE_MASK; 2796 int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK; 2797 uint32_t type = 2798 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; 2799 uint32_t idt_v = 2800 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; 2801 bool has_error_code = false; 2802 u32 error_code = 0; 2803 2804 tss_selector = (u16)svm->vmcb->control.exit_info_1; 2805 2806 if (svm->vmcb->control.exit_info_2 & 2807 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) 2808 reason = TASK_SWITCH_IRET; 2809 else if (svm->vmcb->control.exit_info_2 & 2810 (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) 2811 reason = TASK_SWITCH_JMP; 2812 else if (idt_v) 2813 reason = TASK_SWITCH_GATE; 2814 else 2815 reason = TASK_SWITCH_CALL; 2816 2817 if (reason == TASK_SWITCH_GATE) { 2818 switch (type) { 2819 case SVM_EXITINTINFO_TYPE_NMI: 2820 svm->vcpu.arch.nmi_injected = false; 2821 break; 2822 case SVM_EXITINTINFO_TYPE_EXEPT: 2823 if (svm->vmcb->control.exit_info_2 & 2824 (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { 2825 has_error_code = true; 2826 error_code = 2827 (u32)svm->vmcb->control.exit_info_2; 2828 } 2829 kvm_clear_exception_queue(&svm->vcpu); 2830 break; 2831 case SVM_EXITINTINFO_TYPE_INTR: 2832 kvm_clear_interrupt_queue(&svm->vcpu); 2833 break; 2834 default: 2835 break; 2836 } 2837 } 2838 2839 if (reason != TASK_SWITCH_GATE || 2840 int_type == SVM_EXITINTINFO_TYPE_SOFT || 2841 (int_type == SVM_EXITINTINFO_TYPE_EXEPT && 2842 (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) 2843 skip_emulated_instruction(&svm->vcpu); 2844 2845 if (int_type != SVM_EXITINTINFO_TYPE_SOFT) 2846 int_vec = -1; 2847 2848 if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason, 2849 has_error_code, error_code) == EMULATE_FAIL) { 2850 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 2851 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; 2852 svm->vcpu.run->internal.ndata = 0; 2853 return 0; 2854 } 2855 return 1; 2856} 2857 2858static int cpuid_interception(struct vcpu_svm *svm) 2859{ 2860 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 2861 kvm_emulate_cpuid(&svm->vcpu); 2862 return 1; 2863} 2864 2865static int iret_interception(struct vcpu_svm *svm) 2866{ 2867 ++svm->vcpu.stat.nmi_window_exits; 2868 clr_intercept(svm, INTERCEPT_IRET); 2869 svm->vcpu.arch.hflags |= HF_IRET_MASK; 2870 svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); 2871 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 2872 return 1; 2873} 2874 2875static int invlpg_interception(struct vcpu_svm *svm) 2876{ 2877 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) 2878 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 2879 2880 kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); 2881 skip_emulated_instruction(&svm->vcpu); 2882 return 1; 2883} 2884 2885static int emulate_on_interception(struct vcpu_svm *svm) 2886{ 2887 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; 2888} 2889 2890static int rdpmc_interception(struct vcpu_svm *svm) 2891{ 2892 int err; 2893 2894 if (!static_cpu_has(X86_FEATURE_NRIPS)) 2895 return emulate_on_interception(svm); 2896 2897 err = kvm_rdpmc(&svm->vcpu); 2898 kvm_complete_insn_gp(&svm->vcpu, err); 2899 2900 return 1; 2901} 2902 2903bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) 2904{ 2905 unsigned long cr0 = svm->vcpu.arch.cr0; 2906 bool ret = false; 2907 u64 intercept; 2908 2909 intercept = svm->nested.intercept; 2910 2911 if (!is_guest_mode(&svm->vcpu) || 2912 (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) 2913 return false; 2914 2915 cr0 &= ~SVM_CR0_SELECTIVE_MASK; 2916 val &= ~SVM_CR0_SELECTIVE_MASK; 2917 2918 if (cr0 ^ val) { 2919 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; 2920 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); 2921 } 2922 2923 return ret; 2924} 2925 2926#define CR_VALID (1ULL << 63) 2927 2928static int cr_interception(struct vcpu_svm *svm) 2929{ 2930 int reg, cr; 2931 unsigned long val; 2932 int err; 2933 2934 if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) 2935 return emulate_on_interception(svm); 2936 2937 if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0)) 2938 return emulate_on_interception(svm); 2939 2940 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; 2941 cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; 2942 2943 err = 0; 2944 if (cr >= 16) { /* mov to cr */ 2945 cr -= 16; 2946 val = kvm_register_read(&svm->vcpu, reg); 2947 switch (cr) { 2948 case 0: 2949 if (!check_selective_cr0_intercepted(svm, val)) 2950 err = kvm_set_cr0(&svm->vcpu, val); 2951 else 2952 return 1; 2953 2954 break; 2955 case 3: 2956 err = kvm_set_cr3(&svm->vcpu, val); 2957 break; 2958 case 4: 2959 err = kvm_set_cr4(&svm->vcpu, val); 2960 break; 2961 case 8: 2962 err = kvm_set_cr8(&svm->vcpu, val); 2963 break; 2964 default: 2965 WARN(1, "unhandled write to CR%d", cr); 2966 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2967 return 1; 2968 } 2969 } else { /* mov from cr */ 2970 switch (cr) { 2971 case 0: 2972 val = kvm_read_cr0(&svm->vcpu); 2973 break; 2974 case 2: 2975 val = svm->vcpu.arch.cr2; 2976 break; 2977 case 3: 2978 val = kvm_read_cr3(&svm->vcpu); 2979 break; 2980 case 4: 2981 val = kvm_read_cr4(&svm->vcpu); 2982 break; 2983 case 8: 2984 val = kvm_get_cr8(&svm->vcpu); 2985 break; 2986 default: 2987 WARN(1, "unhandled read from CR%d", cr); 2988 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 2989 return 1; 2990 } 2991 kvm_register_write(&svm->vcpu, reg, val); 2992 } 2993 kvm_complete_insn_gp(&svm->vcpu, err); 2994 2995 return 1; 2996} 2997 2998static int dr_interception(struct vcpu_svm *svm) 2999{ 3000 int reg, dr; 3001 unsigned long val; 3002 int err; 3003 3004 if (svm->vcpu.guest_debug == 0) { 3005 /* 3006 * No more DR vmexits; force a reload of the debug registers 3007 * and reenter on this instruction. The next vmexit will 3008 * retrieve the full state of the debug registers. 3009 */ 3010 clr_dr_intercepts(svm); 3011 svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; 3012 return 1; 3013 } 3014 3015 if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) 3016 return emulate_on_interception(svm); 3017 3018 reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; 3019 dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0; 3020 3021 if (dr >= 16) { /* mov to DRn */ 3022 val = kvm_register_read(&svm->vcpu, reg); 3023 kvm_set_dr(&svm->vcpu, dr - 16, val); 3024 } else { 3025 err = kvm_get_dr(&svm->vcpu, dr, &val); 3026 if (!err) 3027 kvm_register_write(&svm->vcpu, reg, val); 3028 } 3029 3030 skip_emulated_instruction(&svm->vcpu); 3031 3032 return 1; 3033} 3034 3035static int cr8_write_interception(struct vcpu_svm *svm) 3036{ 3037 struct kvm_run *kvm_run = svm->vcpu.run; 3038 int r; 3039 3040 u8 cr8_prev = kvm_get_cr8(&svm->vcpu); 3041 /* instruction emulation calls kvm_set_cr8() */ 3042 r = cr_interception(svm); 3043 if (irqchip_in_kernel(svm->vcpu.kvm)) 3044 return r; 3045 if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) 3046 return r; 3047 kvm_run->exit_reason = KVM_EXIT_SET_TPR; 3048 return 0; 3049} 3050 3051static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) 3052{ 3053 struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); 3054 return vmcb->control.tsc_offset + 3055 svm_scale_tsc(vcpu, host_tsc); 3056} 3057 3058static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) 3059{ 3060 struct vcpu_svm *svm = to_svm(vcpu); 3061 3062 switch (ecx) { 3063 case MSR_IA32_TSC: { 3064 *data = svm->vmcb->control.tsc_offset + 3065 svm_scale_tsc(vcpu, native_read_tsc()); 3066 3067 break; 3068 } 3069 case MSR_STAR: 3070 *data = svm->vmcb->save.star; 3071 break; 3072#ifdef CONFIG_X86_64 3073 case MSR_LSTAR: 3074 *data = svm->vmcb->save.lstar; 3075 break; 3076 case MSR_CSTAR: 3077 *data = svm->vmcb->save.cstar; 3078 break; 3079 case MSR_KERNEL_GS_BASE: 3080 *data = svm->vmcb->save.kernel_gs_base; 3081 break; 3082 case MSR_SYSCALL_MASK: 3083 *data = svm->vmcb->save.sfmask; 3084 break; 3085#endif 3086 case MSR_IA32_SYSENTER_CS: 3087 *data = svm->vmcb->save.sysenter_cs; 3088 break; 3089 case MSR_IA32_SYSENTER_EIP: 3090 *data = svm->sysenter_eip; 3091 break; 3092 case MSR_IA32_SYSENTER_ESP: 3093 *data = svm->sysenter_esp; 3094 break; 3095 /* 3096 * Nobody will change the following 5 values in the VMCB so we can 3097 * safely return them on rdmsr. They will always be 0 until LBRV is 3098 * implemented. 3099 */ 3100 case MSR_IA32_DEBUGCTLMSR: 3101 *data = svm->vmcb->save.dbgctl; 3102 break; 3103 case MSR_IA32_LASTBRANCHFROMIP: 3104 *data = svm->vmcb->save.br_from; 3105 break; 3106 case MSR_IA32_LASTBRANCHTOIP: 3107 *data = svm->vmcb->save.br_to; 3108 break; 3109 case MSR_IA32_LASTINTFROMIP: 3110 *data = svm->vmcb->save.last_excp_from; 3111 break; 3112 case MSR_IA32_LASTINTTOIP: 3113 *data = svm->vmcb->save.last_excp_to; 3114 break; 3115 case MSR_VM_HSAVE_PA: 3116 *data = svm->nested.hsave_msr; 3117 break; 3118 case MSR_VM_CR: 3119 *data = svm->nested.vm_cr_msr; 3120 break; 3121 case MSR_IA32_UCODE_REV: 3122 *data = 0x01000065; 3123 break; 3124 default: 3125 return kvm_get_msr_common(vcpu, ecx, data); 3126 } 3127 return 0; 3128} 3129 3130static int rdmsr_interception(struct vcpu_svm *svm) 3131{ 3132 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 3133 u64 data; 3134 3135 if (svm_get_msr(&svm->vcpu, ecx, &data)) { 3136 trace_kvm_msr_read_ex(ecx); 3137 kvm_inject_gp(&svm->vcpu, 0); 3138 } else { 3139 trace_kvm_msr_read(ecx, data); 3140 3141 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; 3142 svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; 3143 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3144 skip_emulated_instruction(&svm->vcpu); 3145 } 3146 return 1; 3147} 3148 3149static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) 3150{ 3151 struct vcpu_svm *svm = to_svm(vcpu); 3152 int svm_dis, chg_mask; 3153 3154 if (data & ~SVM_VM_CR_VALID_MASK) 3155 return 1; 3156 3157 chg_mask = SVM_VM_CR_VALID_MASK; 3158 3159 if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) 3160 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); 3161 3162 svm->nested.vm_cr_msr &= ~chg_mask; 3163 svm->nested.vm_cr_msr |= (data & chg_mask); 3164 3165 svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; 3166 3167 /* check for svm_disable while efer.svme is set */ 3168 if (svm_dis && (vcpu->arch.efer & EFER_SVME)) 3169 return 1; 3170 3171 return 0; 3172} 3173 3174static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) 3175{ 3176 struct vcpu_svm *svm = to_svm(vcpu); 3177 3178 u32 ecx = msr->index; 3179 u64 data = msr->data; 3180 switch (ecx) { 3181 case MSR_IA32_TSC: 3182 kvm_write_tsc(vcpu, msr); 3183 break; 3184 case MSR_STAR: 3185 svm->vmcb->save.star = data; 3186 break; 3187#ifdef CONFIG_X86_64 3188 case MSR_LSTAR: 3189 svm->vmcb->save.lstar = data; 3190 break; 3191 case MSR_CSTAR: 3192 svm->vmcb->save.cstar = data; 3193 break; 3194 case MSR_KERNEL_GS_BASE: 3195 svm->vmcb->save.kernel_gs_base = data; 3196 break; 3197 case MSR_SYSCALL_MASK: 3198 svm->vmcb->save.sfmask = data; 3199 break; 3200#endif 3201 case MSR_IA32_SYSENTER_CS: 3202 svm->vmcb->save.sysenter_cs = data; 3203 break; 3204 case MSR_IA32_SYSENTER_EIP: 3205 svm->sysenter_eip = data; 3206 svm->vmcb->save.sysenter_eip = data; 3207 break; 3208 case MSR_IA32_SYSENTER_ESP: 3209 svm->sysenter_esp = data; 3210 svm->vmcb->save.sysenter_esp = data; 3211 break; 3212 case MSR_IA32_DEBUGCTLMSR: 3213 if (!boot_cpu_has(X86_FEATURE_LBRV)) { 3214 vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", 3215 __func__, data); 3216 break; 3217 } 3218 if (data & DEBUGCTL_RESERVED_BITS) 3219 return 1; 3220 3221 svm->vmcb->save.dbgctl = data; 3222 mark_dirty(svm->vmcb, VMCB_LBR); 3223 if (data & (1ULL<<0)) 3224 svm_enable_lbrv(svm); 3225 else 3226 svm_disable_lbrv(svm); 3227 break; 3228 case MSR_VM_HSAVE_PA: 3229 svm->nested.hsave_msr = data; 3230 break; 3231 case MSR_VM_CR: 3232 return svm_set_vm_cr(vcpu, data); 3233 case MSR_VM_IGNNE: 3234 vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); 3235 break; 3236 default: 3237 return kvm_set_msr_common(vcpu, msr); 3238 } 3239 return 0; 3240} 3241 3242static int wrmsr_interception(struct vcpu_svm *svm) 3243{ 3244 struct msr_data msr; 3245 u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 3246 u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) 3247 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); 3248 3249 msr.data = data; 3250 msr.index = ecx; 3251 msr.host_initiated = false; 3252 3253 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; 3254 if (kvm_set_msr(&svm->vcpu, &msr)) { 3255 trace_kvm_msr_write_ex(ecx, data); 3256 kvm_inject_gp(&svm->vcpu, 0); 3257 } else { 3258 trace_kvm_msr_write(ecx, data); 3259 skip_emulated_instruction(&svm->vcpu); 3260 } 3261 return 1; 3262} 3263 3264static int msr_interception(struct vcpu_svm *svm) 3265{ 3266 if (svm->vmcb->control.exit_info_1) 3267 return wrmsr_interception(svm); 3268 else 3269 return rdmsr_interception(svm); 3270} 3271 3272static int interrupt_window_interception(struct vcpu_svm *svm) 3273{ 3274 struct kvm_run *kvm_run = svm->vcpu.run; 3275 3276 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 3277 svm_clear_vintr(svm); 3278 svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; 3279 mark_dirty(svm->vmcb, VMCB_INTR); 3280 ++svm->vcpu.stat.irq_window_exits; 3281 /* 3282 * If the user space waits to inject interrupts, exit as soon as 3283 * possible 3284 */ 3285 if (!irqchip_in_kernel(svm->vcpu.kvm) && 3286 kvm_run->request_interrupt_window && 3287 !kvm_cpu_has_interrupt(&svm->vcpu)) { 3288 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; 3289 return 0; 3290 } 3291 3292 return 1; 3293} 3294 3295static int pause_interception(struct vcpu_svm *svm) 3296{ 3297 kvm_vcpu_on_spin(&(svm->vcpu)); 3298 return 1; 3299} 3300 3301static int nop_interception(struct vcpu_svm *svm) 3302{ 3303 skip_emulated_instruction(&(svm->vcpu)); 3304 return 1; 3305} 3306 3307static int monitor_interception(struct vcpu_svm *svm) 3308{ 3309 printk_once(KERN_WARNING "kvm: MONITOR instruction emulated as NOP!\n"); 3310 return nop_interception(svm); 3311} 3312 3313static int mwait_interception(struct vcpu_svm *svm) 3314{ 3315 printk_once(KERN_WARNING "kvm: MWAIT instruction emulated as NOP!\n"); 3316 return nop_interception(svm); 3317} 3318 3319static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { 3320 [SVM_EXIT_READ_CR0] = cr_interception, 3321 [SVM_EXIT_READ_CR3] = cr_interception, 3322 [SVM_EXIT_READ_CR4] = cr_interception, 3323 [SVM_EXIT_READ_CR8] = cr_interception, 3324 [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, 3325 [SVM_EXIT_WRITE_CR0] = cr_interception, 3326 [SVM_EXIT_WRITE_CR3] = cr_interception, 3327 [SVM_EXIT_WRITE_CR4] = cr_interception, 3328 [SVM_EXIT_WRITE_CR8] = cr8_write_interception, 3329 [SVM_EXIT_READ_DR0] = dr_interception, 3330 [SVM_EXIT_READ_DR1] = dr_interception, 3331 [SVM_EXIT_READ_DR2] = dr_interception, 3332 [SVM_EXIT_READ_DR3] = dr_interception, 3333 [SVM_EXIT_READ_DR4] = dr_interception, 3334 [SVM_EXIT_READ_DR5] = dr_interception, 3335 [SVM_EXIT_READ_DR6] = dr_interception, 3336 [SVM_EXIT_READ_DR7] = dr_interception, 3337 [SVM_EXIT_WRITE_DR0] = dr_interception, 3338 [SVM_EXIT_WRITE_DR1] = dr_interception, 3339 [SVM_EXIT_WRITE_DR2] = dr_interception, 3340 [SVM_EXIT_WRITE_DR3] = dr_interception, 3341 [SVM_EXIT_WRITE_DR4] = dr_interception, 3342 [SVM_EXIT_WRITE_DR5] = dr_interception, 3343 [SVM_EXIT_WRITE_DR6] = dr_interception, 3344 [SVM_EXIT_WRITE_DR7] = dr_interception, 3345 [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, 3346 [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, 3347 [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, 3348 [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, 3349 [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, 3350 [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, 3351 [SVM_EXIT_INTR] = intr_interception, 3352 [SVM_EXIT_NMI] = nmi_interception, 3353 [SVM_EXIT_SMI] = nop_on_interception, 3354 [SVM_EXIT_INIT] = nop_on_interception, 3355 [SVM_EXIT_VINTR] = interrupt_window_interception, 3356 [SVM_EXIT_RDPMC] = rdpmc_interception, 3357 [SVM_EXIT_CPUID] = cpuid_interception, 3358 [SVM_EXIT_IRET] = iret_interception, 3359 [SVM_EXIT_INVD] = emulate_on_interception, 3360 [SVM_EXIT_PAUSE] = pause_interception, 3361 [SVM_EXIT_HLT] = halt_interception, 3362 [SVM_EXIT_INVLPG] = invlpg_interception, 3363 [SVM_EXIT_INVLPGA] = invlpga_interception, 3364 [SVM_EXIT_IOIO] = io_interception, 3365 [SVM_EXIT_MSR] = msr_interception, 3366 [SVM_EXIT_TASK_SWITCH] = task_switch_interception, 3367 [SVM_EXIT_SHUTDOWN] = shutdown_interception, 3368 [SVM_EXIT_VMRUN] = vmrun_interception, 3369 [SVM_EXIT_VMMCALL] = vmmcall_interception, 3370 [SVM_EXIT_VMLOAD] = vmload_interception, 3371 [SVM_EXIT_VMSAVE] = vmsave_interception, 3372 [SVM_EXIT_STGI] = stgi_interception, 3373 [SVM_EXIT_CLGI] = clgi_interception, 3374 [SVM_EXIT_SKINIT] = skinit_interception, 3375 [SVM_EXIT_WBINVD] = emulate_on_interception, 3376 [SVM_EXIT_MONITOR] = monitor_interception, 3377 [SVM_EXIT_MWAIT] = mwait_interception, 3378 [SVM_EXIT_XSETBV] = xsetbv_interception, 3379 [SVM_EXIT_NPF] = pf_interception, 3380}; 3381 3382static void dump_vmcb(struct kvm_vcpu *vcpu) 3383{ 3384 struct vcpu_svm *svm = to_svm(vcpu); 3385 struct vmcb_control_area *control = &svm->vmcb->control; 3386 struct vmcb_save_area *save = &svm->vmcb->save; 3387 3388 pr_err("VMCB Control Area:\n"); 3389 pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff); 3390 pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16); 3391 pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff); 3392 pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16); 3393 pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions); 3394 pr_err("%-20s%016llx\n", "intercepts:", control->intercept); 3395 pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count); 3396 pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa); 3397 pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa); 3398 pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset); 3399 pr_err("%-20s%d\n", "asid:", control->asid); 3400 pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl); 3401 pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl); 3402 pr_err("%-20s%08x\n", "int_vector:", control->int_vector); 3403 pr_err("%-20s%08x\n", "int_state:", control->int_state); 3404 pr_err("%-20s%08x\n", "exit_code:", control->exit_code); 3405 pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1); 3406 pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2); 3407 pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info); 3408 pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err); 3409 pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl); 3410 pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3); 3411 pr_err("%-20s%08x\n", "event_inj:", control->event_inj); 3412 pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); 3413 pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); 3414 pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); 3415 pr_err("VMCB State Save Area:\n"); 3416 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3417 "es:", 3418 save->es.selector, save->es.attrib, 3419 save->es.limit, save->es.base); 3420 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3421 "cs:", 3422 save->cs.selector, save->cs.attrib, 3423 save->cs.limit, save->cs.base); 3424 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3425 "ss:", 3426 save->ss.selector, save->ss.attrib, 3427 save->ss.limit, save->ss.base); 3428 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3429 "ds:", 3430 save->ds.selector, save->ds.attrib, 3431 save->ds.limit, save->ds.base); 3432 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3433 "fs:", 3434 save->fs.selector, save->fs.attrib, 3435 save->fs.limit, save->fs.base); 3436 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3437 "gs:", 3438 save->gs.selector, save->gs.attrib, 3439 save->gs.limit, save->gs.base); 3440 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3441 "gdtr:", 3442 save->gdtr.selector, save->gdtr.attrib, 3443 save->gdtr.limit, save->gdtr.base); 3444 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3445 "ldtr:", 3446 save->ldtr.selector, save->ldtr.attrib, 3447 save->ldtr.limit, save->ldtr.base); 3448 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3449 "idtr:", 3450 save->idtr.selector, save->idtr.attrib, 3451 save->idtr.limit, save->idtr.base); 3452 pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n", 3453 "tr:", 3454 save->tr.selector, save->tr.attrib, 3455 save->tr.limit, save->tr.base); 3456 pr_err("cpl: %d efer: %016llx\n", 3457 save->cpl, save->efer); 3458 pr_err("%-15s %016llx %-13s %016llx\n", 3459 "cr0:", save->cr0, "cr2:", save->cr2); 3460 pr_err("%-15s %016llx %-13s %016llx\n", 3461 "cr3:", save->cr3, "cr4:", save->cr4); 3462 pr_err("%-15s %016llx %-13s %016llx\n", 3463 "dr6:", save->dr6, "dr7:", save->dr7); 3464 pr_err("%-15s %016llx %-13s %016llx\n", 3465 "rip:", save->rip, "rflags:", save->rflags); 3466 pr_err("%-15s %016llx %-13s %016llx\n", 3467 "rsp:", save->rsp, "rax:", save->rax); 3468 pr_err("%-15s %016llx %-13s %016llx\n", 3469 "star:", save->star, "lstar:", save->lstar); 3470 pr_err("%-15s %016llx %-13s %016llx\n", 3471 "cstar:", save->cstar, "sfmask:", save->sfmask); 3472 pr_err("%-15s %016llx %-13s %016llx\n", 3473 "kernel_gs_base:", save->kernel_gs_base, 3474 "sysenter_cs:", save->sysenter_cs); 3475 pr_err("%-15s %016llx %-13s %016llx\n", 3476 "sysenter_esp:", save->sysenter_esp, 3477 "sysenter_eip:", save->sysenter_eip); 3478 pr_err("%-15s %016llx %-13s %016llx\n", 3479 "gpat:", save->g_pat, "dbgctl:", save->dbgctl); 3480 pr_err("%-15s %016llx %-13s %016llx\n", 3481 "br_from:", save->br_from, "br_to:", save->br_to); 3482 pr_err("%-15s %016llx %-13s %016llx\n", 3483 "excp_from:", save->last_excp_from, 3484 "excp_to:", save->last_excp_to); 3485} 3486 3487static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) 3488{ 3489 struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; 3490 3491 *info1 = control->exit_info_1; 3492 *info2 = control->exit_info_2; 3493} 3494 3495static int handle_exit(struct kvm_vcpu *vcpu) 3496{ 3497 struct vcpu_svm *svm = to_svm(vcpu); 3498 struct kvm_run *kvm_run = vcpu->run; 3499 u32 exit_code = svm->vmcb->control.exit_code; 3500 3501 if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) 3502 vcpu->arch.cr0 = svm->vmcb->save.cr0; 3503 if (npt_enabled) 3504 vcpu->arch.cr3 = svm->vmcb->save.cr3; 3505 3506 if (unlikely(svm->nested.exit_required)) { 3507 nested_svm_vmexit(svm); 3508 svm->nested.exit_required = false; 3509 3510 return 1; 3511 } 3512 3513 if (is_guest_mode(vcpu)) { 3514 int vmexit; 3515 3516 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code, 3517 svm->vmcb->control.exit_info_1, 3518 svm->vmcb->control.exit_info_2, 3519 svm->vmcb->control.exit_int_info, 3520 svm->vmcb->control.exit_int_info_err, 3521 KVM_ISA_SVM); 3522 3523 vmexit = nested_svm_exit_special(svm); 3524 3525 if (vmexit == NESTED_EXIT_CONTINUE) 3526 vmexit = nested_svm_exit_handled(svm); 3527 3528 if (vmexit == NESTED_EXIT_DONE) 3529 return 1; 3530 } 3531 3532 svm_complete_interrupts(svm); 3533 3534 if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 3535 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; 3536 kvm_run->fail_entry.hardware_entry_failure_reason 3537 = svm->vmcb->control.exit_code; 3538 pr_err("KVM: FAILED VMRUN WITH VMCB:\n"); 3539 dump_vmcb(vcpu); 3540 return 0; 3541 } 3542 3543 if (is_external_interrupt(svm->vmcb->control.exit_int_info) && 3544 exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && 3545 exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH && 3546 exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI) 3547 printk(KERN_ERR "%s: unexpected exit_int_info 0x%x " 3548 "exit_code 0x%x\n", 3549 __func__, svm->vmcb->control.exit_int_info, 3550 exit_code); 3551 3552 if (exit_code >= ARRAY_SIZE(svm_exit_handlers) 3553 || !svm_exit_handlers[exit_code]) { 3554 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); 3555 kvm_queue_exception(vcpu, UD_VECTOR); 3556 return 1; 3557 } 3558 3559 return svm_exit_handlers[exit_code](svm); 3560} 3561 3562static void reload_tss(struct kvm_vcpu *vcpu) 3563{ 3564 int cpu = raw_smp_processor_id(); 3565 3566 struct svm_cpu_data *sd = per_cpu(svm_data, cpu); 3567 sd->tss_desc->type = 9; /* available 32/64-bit TSS */ 3568 load_TR_desc(); 3569} 3570 3571static void pre_svm_run(struct vcpu_svm *svm) 3572{ 3573 int cpu = raw_smp_processor_id(); 3574 3575 struct svm_cpu_data *sd = per_cpu(svm_data, cpu); 3576 3577 /* FIXME: handle wraparound of asid_generation */ 3578 if (svm->asid_generation != sd->asid_generation) 3579 new_asid(svm, sd); 3580} 3581 3582static void svm_inject_nmi(struct kvm_vcpu *vcpu) 3583{ 3584 struct vcpu_svm *svm = to_svm(vcpu); 3585 3586 svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; 3587 vcpu->arch.hflags |= HF_NMI_MASK; 3588 set_intercept(svm, INTERCEPT_IRET); 3589 ++vcpu->stat.nmi_injections; 3590} 3591 3592static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) 3593{ 3594 struct vmcb_control_area *control; 3595 3596 control = &svm->vmcb->control; 3597 control->int_vector = irq; 3598 control->int_ctl &= ~V_INTR_PRIO_MASK; 3599 control->int_ctl |= V_IRQ_MASK | 3600 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); 3601 mark_dirty(svm->vmcb, VMCB_INTR); 3602} 3603 3604static void svm_set_irq(struct kvm_vcpu *vcpu) 3605{ 3606 struct vcpu_svm *svm = to_svm(vcpu); 3607 3608 BUG_ON(!(gif_set(svm))); 3609 3610 trace_kvm_inj_virq(vcpu->arch.interrupt.nr); 3611 ++vcpu->stat.irq_injections; 3612 3613 svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr | 3614 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; 3615} 3616 3617static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) 3618{ 3619 struct vcpu_svm *svm = to_svm(vcpu); 3620 3621 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 3622 return; 3623 3624 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); 3625 3626 if (irr == -1) 3627 return; 3628 3629 if (tpr >= irr) 3630 set_cr_intercept(svm, INTERCEPT_CR8_WRITE); 3631} 3632 3633static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) 3634{ 3635 return; 3636} 3637 3638static int svm_vm_has_apicv(struct kvm *kvm) 3639{ 3640 return 0; 3641} 3642 3643static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) 3644{ 3645 return; 3646} 3647 3648static void svm_hwapic_isr_update(struct kvm *kvm, int isr) 3649{ 3650 return; 3651} 3652 3653static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) 3654{ 3655 return; 3656} 3657 3658static int svm_nmi_allowed(struct kvm_vcpu *vcpu) 3659{ 3660 struct vcpu_svm *svm = to_svm(vcpu); 3661 struct vmcb *vmcb = svm->vmcb; 3662 int ret; 3663 ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && 3664 !(svm->vcpu.arch.hflags & HF_NMI_MASK); 3665 ret = ret && gif_set(svm) && nested_svm_nmi(svm); 3666 3667 return ret; 3668} 3669 3670static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) 3671{ 3672 struct vcpu_svm *svm = to_svm(vcpu); 3673 3674 return !!(svm->vcpu.arch.hflags & HF_NMI_MASK); 3675} 3676 3677static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) 3678{ 3679 struct vcpu_svm *svm = to_svm(vcpu); 3680 3681 if (masked) { 3682 svm->vcpu.arch.hflags |= HF_NMI_MASK; 3683 set_intercept(svm, INTERCEPT_IRET); 3684 } else { 3685 svm->vcpu.arch.hflags &= ~HF_NMI_MASK; 3686 clr_intercept(svm, INTERCEPT_IRET); 3687 } 3688} 3689 3690static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) 3691{ 3692 struct vcpu_svm *svm = to_svm(vcpu); 3693 struct vmcb *vmcb = svm->vmcb; 3694 int ret; 3695 3696 if (!gif_set(svm) || 3697 (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) 3698 return 0; 3699 3700 ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); 3701 3702 if (is_guest_mode(vcpu)) 3703 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK); 3704 3705 return ret; 3706} 3707 3708static void enable_irq_window(struct kvm_vcpu *vcpu) 3709{ 3710 struct vcpu_svm *svm = to_svm(vcpu); 3711 3712 /* 3713 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes 3714 * 1, because that's a separate STGI/VMRUN intercept. The next time we 3715 * get that intercept, this function will be called again though and 3716 * we'll get the vintr intercept. 3717 */ 3718 if (gif_set(svm) && nested_svm_intr(svm)) { 3719 svm_set_vintr(svm); 3720 svm_inject_irq(svm, 0x0); 3721 } 3722} 3723 3724static void enable_nmi_window(struct kvm_vcpu *vcpu) 3725{ 3726 struct vcpu_svm *svm = to_svm(vcpu); 3727 3728 if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) 3729 == HF_NMI_MASK) 3730 return; /* IRET will cause a vm exit */ 3731 3732 /* 3733 * Something prevents NMI from been injected. Single step over possible 3734 * problem (IRET or exception injection or interrupt shadow) 3735 */ 3736 svm->nmi_singlestep = true; 3737 svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); 3738 update_db_bp_intercept(vcpu); 3739} 3740 3741static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) 3742{ 3743 return 0; 3744} 3745 3746static void svm_flush_tlb(struct kvm_vcpu *vcpu) 3747{ 3748 struct vcpu_svm *svm = to_svm(vcpu); 3749 3750 if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) 3751 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; 3752 else 3753 svm->asid_generation--; 3754} 3755 3756static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) 3757{ 3758} 3759 3760static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) 3761{ 3762 struct vcpu_svm *svm = to_svm(vcpu); 3763 3764 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 3765 return; 3766 3767 if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) { 3768 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; 3769 kvm_set_cr8(vcpu, cr8); 3770 } 3771} 3772 3773static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) 3774{ 3775 struct vcpu_svm *svm = to_svm(vcpu); 3776 u64 cr8; 3777 3778 if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) 3779 return; 3780 3781 cr8 = kvm_get_cr8(vcpu); 3782 svm->vmcb->control.int_ctl &= ~V_TPR_MASK; 3783 svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; 3784} 3785 3786static void svm_complete_interrupts(struct vcpu_svm *svm) 3787{ 3788 u8 vector; 3789 int type; 3790 u32 exitintinfo = svm->vmcb->control.exit_int_info; 3791 unsigned int3_injected = svm->int3_injected; 3792 3793 svm->int3_injected = 0; 3794 3795 /* 3796 * If we've made progress since setting HF_IRET_MASK, we've 3797 * executed an IRET and can allow NMI injection. 3798 */ 3799 if ((svm->vcpu.arch.hflags & HF_IRET_MASK) 3800 && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { 3801 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); 3802 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 3803 } 3804 3805 svm->vcpu.arch.nmi_injected = false; 3806 kvm_clear_exception_queue(&svm->vcpu); 3807 kvm_clear_interrupt_queue(&svm->vcpu); 3808 3809 if (!(exitintinfo & SVM_EXITINTINFO_VALID)) 3810 return; 3811 3812 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 3813 3814 vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK; 3815 type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK; 3816 3817 switch (type) { 3818 case SVM_EXITINTINFO_TYPE_NMI: 3819 svm->vcpu.arch.nmi_injected = true; 3820 break; 3821 case SVM_EXITINTINFO_TYPE_EXEPT: 3822 /* 3823 * In case of software exceptions, do not reinject the vector, 3824 * but re-execute the instruction instead. Rewind RIP first 3825 * if we emulated INT3 before. 3826 */ 3827 if (kvm_exception_is_soft(vector)) { 3828 if (vector == BP_VECTOR && int3_injected && 3829 kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) 3830 kvm_rip_write(&svm->vcpu, 3831 kvm_rip_read(&svm->vcpu) - 3832 int3_injected); 3833 break; 3834 } 3835 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { 3836 u32 err = svm->vmcb->control.exit_int_info_err; 3837 kvm_requeue_exception_e(&svm->vcpu, vector, err); 3838 3839 } else 3840 kvm_requeue_exception(&svm->vcpu, vector); 3841 break; 3842 case SVM_EXITINTINFO_TYPE_INTR: 3843 kvm_queue_interrupt(&svm->vcpu, vector, false); 3844 break; 3845 default: 3846 break; 3847 } 3848} 3849 3850static void svm_cancel_injection(struct kvm_vcpu *vcpu) 3851{ 3852 struct vcpu_svm *svm = to_svm(vcpu); 3853 struct vmcb_control_area *control = &svm->vmcb->control; 3854 3855 control->exit_int_info = control->event_inj; 3856 control->exit_int_info_err = control->event_inj_err; 3857 control->event_inj = 0; 3858 svm_complete_interrupts(svm); 3859} 3860 3861static void svm_vcpu_run(struct kvm_vcpu *vcpu) 3862{ 3863 struct vcpu_svm *svm = to_svm(vcpu); 3864 3865 svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; 3866 svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; 3867 svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; 3868 3869 /* 3870 * A vmexit emulation is required before the vcpu can be executed 3871 * again. 3872 */ 3873 if (unlikely(svm->nested.exit_required)) 3874 return; 3875 3876 pre_svm_run(svm); 3877 3878 sync_lapic_to_cr8(vcpu); 3879 3880 svm->vmcb->save.cr2 = vcpu->arch.cr2; 3881 3882 clgi(); 3883 3884 local_irq_enable(); 3885 3886 asm volatile ( 3887 "push %%" _ASM_BP "; \n\t" 3888 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t" 3889 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t" 3890 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t" 3891 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t" 3892 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t" 3893 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t" 3894#ifdef CONFIG_X86_64 3895 "mov %c[r8](%[svm]), %%r8 \n\t" 3896 "mov %c[r9](%[svm]), %%r9 \n\t" 3897 "mov %c[r10](%[svm]), %%r10 \n\t" 3898 "mov %c[r11](%[svm]), %%r11 \n\t" 3899 "mov %c[r12](%[svm]), %%r12 \n\t" 3900 "mov %c[r13](%[svm]), %%r13 \n\t" 3901 "mov %c[r14](%[svm]), %%r14 \n\t" 3902 "mov %c[r15](%[svm]), %%r15 \n\t" 3903#endif 3904 3905 /* Enter guest mode */ 3906 "push %%" _ASM_AX " \n\t" 3907 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t" 3908 __ex(SVM_VMLOAD) "\n\t" 3909 __ex(SVM_VMRUN) "\n\t" 3910 __ex(SVM_VMSAVE) "\n\t" 3911 "pop %%" _ASM_AX " \n\t" 3912 3913 /* Save guest registers, load host registers */ 3914 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t" 3915 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t" 3916 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t" 3917 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t" 3918 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t" 3919 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t" 3920#ifdef CONFIG_X86_64 3921 "mov %%r8, %c[r8](%[svm]) \n\t" 3922 "mov %%r9, %c[r9](%[svm]) \n\t" 3923 "mov %%r10, %c[r10](%[svm]) \n\t" 3924 "mov %%r11, %c[r11](%[svm]) \n\t" 3925 "mov %%r12, %c[r12](%[svm]) \n\t" 3926 "mov %%r13, %c[r13](%[svm]) \n\t" 3927 "mov %%r14, %c[r14](%[svm]) \n\t" 3928 "mov %%r15, %c[r15](%[svm]) \n\t" 3929#endif 3930 "pop %%" _ASM_BP 3931 : 3932 : [svm]"a"(svm), 3933 [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)), 3934 [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])), 3935 [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])), 3936 [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])), 3937 [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])), 3938 [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])), 3939 [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP])) 3940#ifdef CONFIG_X86_64 3941 , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])), 3942 [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])), 3943 [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])), 3944 [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])), 3945 [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])), 3946 [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])), 3947 [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])), 3948 [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15])) 3949#endif 3950 : "cc", "memory" 3951#ifdef CONFIG_X86_64 3952 , "rbx", "rcx", "rdx", "rsi", "rdi" 3953 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15" 3954#else 3955 , "ebx", "ecx", "edx", "esi", "edi" 3956#endif 3957 ); 3958 3959#ifdef CONFIG_X86_64 3960 wrmsrl(MSR_GS_BASE, svm->host.gs_base); 3961#else 3962 loadsegment(fs, svm->host.fs); 3963#ifndef CONFIG_X86_32_LAZY_GS 3964 loadsegment(gs, svm->host.gs); 3965#endif 3966#endif 3967 3968 reload_tss(vcpu); 3969 3970 local_irq_disable(); 3971 3972 vcpu->arch.cr2 = svm->vmcb->save.cr2; 3973 vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; 3974 vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; 3975 vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; 3976 3977 trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM); 3978 3979 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) 3980 kvm_before_handle_nmi(&svm->vcpu); 3981 3982 stgi(); 3983 3984 /* Any pending NMI will happen here */ 3985 3986 if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) 3987 kvm_after_handle_nmi(&svm->vcpu); 3988 3989 sync_cr8_to_lapic(vcpu); 3990 3991 svm->next_rip = 0; 3992 3993 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 3994 3995 /* if exit due to PF check for async PF */ 3996 if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) 3997 svm->apf_reason = kvm_read_and_reset_pf_reason(); 3998 3999 if (npt_enabled) { 4000 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); 4001 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); 4002 } 4003 4004 /* 4005 * We need to handle MC intercepts here before the vcpu has a chance to 4006 * change the physical cpu 4007 */ 4008 if (unlikely(svm->vmcb->control.exit_code == 4009 SVM_EXIT_EXCP_BASE + MC_VECTOR)) 4010 svm_handle_mce(svm); 4011 4012 mark_all_clean(svm->vmcb); 4013} 4014 4015static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) 4016{ 4017 struct vcpu_svm *svm = to_svm(vcpu); 4018 4019 svm->vmcb->save.cr3 = root; 4020 mark_dirty(svm->vmcb, VMCB_CR); 4021 svm_flush_tlb(vcpu); 4022} 4023 4024static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) 4025{ 4026 struct vcpu_svm *svm = to_svm(vcpu); 4027 4028 svm->vmcb->control.nested_cr3 = root; 4029 mark_dirty(svm->vmcb, VMCB_NPT); 4030 4031 /* Also sync guest cr3 here in case we live migrate */ 4032 svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); 4033 mark_dirty(svm->vmcb, VMCB_CR); 4034 4035 svm_flush_tlb(vcpu); 4036} 4037 4038static int is_disabled(void) 4039{ 4040 u64 vm_cr; 4041 4042 rdmsrl(MSR_VM_CR, vm_cr); 4043 if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE)) 4044 return 1; 4045 4046 return 0; 4047} 4048 4049static void 4050svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall) 4051{ 4052 /* 4053 * Patch in the VMMCALL instruction: 4054 */ 4055 hypercall[0] = 0x0f; 4056 hypercall[1] = 0x01; 4057 hypercall[2] = 0xd9; 4058} 4059 4060static void svm_check_processor_compat(void *rtn) 4061{ 4062 *(int *)rtn = 0; 4063} 4064 4065static bool svm_cpu_has_accelerated_tpr(void) 4066{ 4067 return false; 4068} 4069 4070static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) 4071{ 4072 return 0; 4073} 4074 4075static void svm_cpuid_update(struct kvm_vcpu *vcpu) 4076{ 4077} 4078 4079static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) 4080{ 4081 switch (func) { 4082 case 0x80000001: 4083 if (nested) 4084 entry->ecx |= (1 << 2); /* Set SVM bit */ 4085 break; 4086 case 0x8000000A: 4087 entry->eax = 1; /* SVM revision 1 */ 4088 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper 4089 ASID emulation to nested SVM */ 4090 entry->ecx = 0; /* Reserved */ 4091 entry->edx = 0; /* Per default do not support any 4092 additional features */ 4093 4094 /* Support next_rip if host supports it */ 4095 if (boot_cpu_has(X86_FEATURE_NRIPS)) 4096 entry->edx |= SVM_FEATURE_NRIP; 4097 4098 /* Support NPT for the guest if enabled */ 4099 if (npt_enabled) 4100 entry->edx |= SVM_FEATURE_NPT; 4101 4102 break; 4103 } 4104} 4105 4106static int svm_get_lpage_level(void) 4107{ 4108 return PT_PDPE_LEVEL; 4109} 4110 4111static bool svm_rdtscp_supported(void) 4112{ 4113 return false; 4114} 4115 4116static bool svm_invpcid_supported(void) 4117{ 4118 return false; 4119} 4120 4121static bool svm_mpx_supported(void) 4122{ 4123 return false; 4124} 4125 4126static bool svm_has_wbinvd_exit(void) 4127{ 4128 return true; 4129} 4130 4131static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) 4132{ 4133 struct vcpu_svm *svm = to_svm(vcpu); 4134 4135 set_exception_intercept(svm, NM_VECTOR); 4136 update_cr0_intercept(svm); 4137} 4138 4139#define PRE_EX(exit) { .exit_code = (exit), \ 4140 .stage = X86_ICPT_PRE_EXCEPT, } 4141#define POST_EX(exit) { .exit_code = (exit), \ 4142 .stage = X86_ICPT_POST_EXCEPT, } 4143#define POST_MEM(exit) { .exit_code = (exit), \ 4144 .stage = X86_ICPT_POST_MEMACCESS, } 4145 4146static const struct __x86_intercept { 4147 u32 exit_code; 4148 enum x86_intercept_stage stage; 4149} x86_intercept_map[] = { 4150 [x86_intercept_cr_read] = POST_EX(SVM_EXIT_READ_CR0), 4151 [x86_intercept_cr_write] = POST_EX(SVM_EXIT_WRITE_CR0), 4152 [x86_intercept_clts] = POST_EX(SVM_EXIT_WRITE_CR0), 4153 [x86_intercept_lmsw] = POST_EX(SVM_EXIT_WRITE_CR0), 4154 [x86_intercept_smsw] = POST_EX(SVM_EXIT_READ_CR0), 4155 [x86_intercept_dr_read] = POST_EX(SVM_EXIT_READ_DR0), 4156 [x86_intercept_dr_write] = POST_EX(SVM_EXIT_WRITE_DR0), 4157 [x86_intercept_sldt] = POST_EX(SVM_EXIT_LDTR_READ), 4158 [x86_intercept_str] = POST_EX(SVM_EXIT_TR_READ), 4159 [x86_intercept_lldt] = POST_EX(SVM_EXIT_LDTR_WRITE), 4160 [x86_intercept_ltr] = POST_EX(SVM_EXIT_TR_WRITE), 4161 [x86_intercept_sgdt] = POST_EX(SVM_EXIT_GDTR_READ), 4162 [x86_intercept_sidt] = POST_EX(SVM_EXIT_IDTR_READ), 4163 [x86_intercept_lgdt] = POST_EX(SVM_EXIT_GDTR_WRITE), 4164 [x86_intercept_lidt] = POST_EX(SVM_EXIT_IDTR_WRITE), 4165 [x86_intercept_vmrun] = POST_EX(SVM_EXIT_VMRUN), 4166 [x86_intercept_vmmcall] = POST_EX(SVM_EXIT_VMMCALL), 4167 [x86_intercept_vmload] = POST_EX(SVM_EXIT_VMLOAD), 4168 [x86_intercept_vmsave] = POST_EX(SVM_EXIT_VMSAVE), 4169 [x86_intercept_stgi] = POST_EX(SVM_EXIT_STGI), 4170 [x86_intercept_clgi] = POST_EX(SVM_EXIT_CLGI), 4171 [x86_intercept_skinit] = POST_EX(SVM_EXIT_SKINIT), 4172 [x86_intercept_invlpga] = POST_EX(SVM_EXIT_INVLPGA), 4173 [x86_intercept_rdtscp] = POST_EX(SVM_EXIT_RDTSCP), 4174 [x86_intercept_monitor] = POST_MEM(SVM_EXIT_MONITOR), 4175 [x86_intercept_mwait] = POST_EX(SVM_EXIT_MWAIT), 4176 [x86_intercept_invlpg] = POST_EX(SVM_EXIT_INVLPG), 4177 [x86_intercept_invd] = POST_EX(SVM_EXIT_INVD), 4178 [x86_intercept_wbinvd] = POST_EX(SVM_EXIT_WBINVD), 4179 [x86_intercept_wrmsr] = POST_EX(SVM_EXIT_MSR), 4180 [x86_intercept_rdtsc] = POST_EX(SVM_EXIT_RDTSC), 4181 [x86_intercept_rdmsr] = POST_EX(SVM_EXIT_MSR), 4182 [x86_intercept_rdpmc] = POST_EX(SVM_EXIT_RDPMC), 4183 [x86_intercept_cpuid] = PRE_EX(SVM_EXIT_CPUID), 4184 [x86_intercept_rsm] = PRE_EX(SVM_EXIT_RSM), 4185 [x86_intercept_pause] = PRE_EX(SVM_EXIT_PAUSE), 4186 [x86_intercept_pushf] = PRE_EX(SVM_EXIT_PUSHF), 4187 [x86_intercept_popf] = PRE_EX(SVM_EXIT_POPF), 4188 [x86_intercept_intn] = PRE_EX(SVM_EXIT_SWINT), 4189 [x86_intercept_iret] = PRE_EX(SVM_EXIT_IRET), 4190 [x86_intercept_icebp] = PRE_EX(SVM_EXIT_ICEBP), 4191 [x86_intercept_hlt] = POST_EX(SVM_EXIT_HLT), 4192 [x86_intercept_in] = POST_EX(SVM_EXIT_IOIO), 4193 [x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO), 4194 [x86_intercept_out] = POST_EX(SVM_EXIT_IOIO), 4195 [x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO), 4196}; 4197 4198#undef PRE_EX 4199#undef POST_EX 4200#undef POST_MEM 4201 4202static int svm_check_intercept(struct kvm_vcpu *vcpu, 4203 struct x86_instruction_info *info, 4204 enum x86_intercept_stage stage) 4205{ 4206 struct vcpu_svm *svm = to_svm(vcpu); 4207 int vmexit, ret = X86EMUL_CONTINUE; 4208 struct __x86_intercept icpt_info; 4209 struct vmcb *vmcb = svm->vmcb; 4210 4211 if (info->intercept >= ARRAY_SIZE(x86_intercept_map)) 4212 goto out; 4213 4214 icpt_info = x86_intercept_map[info->intercept]; 4215 4216 if (stage != icpt_info.stage) 4217 goto out; 4218 4219 switch (icpt_info.exit_code) { 4220 case SVM_EXIT_READ_CR0: 4221 if (info->intercept == x86_intercept_cr_read) 4222 icpt_info.exit_code += info->modrm_reg; 4223 break; 4224 case SVM_EXIT_WRITE_CR0: { 4225 unsigned long cr0, val; 4226 u64 intercept; 4227 4228 if (info->intercept == x86_intercept_cr_write) 4229 icpt_info.exit_code += info->modrm_reg; 4230 4231 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 || 4232 info->intercept == x86_intercept_clts) 4233 break; 4234 4235 intercept = svm->nested.intercept; 4236 4237 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) 4238 break; 4239 4240 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK; 4241 val = info->src_val & ~SVM_CR0_SELECTIVE_MASK; 4242 4243 if (info->intercept == x86_intercept_lmsw) { 4244 cr0 &= 0xfUL; 4245 val &= 0xfUL; 4246 /* lmsw can't clear PE - catch this here */ 4247 if (cr0 & X86_CR0_PE) 4248 val |= X86_CR0_PE; 4249 } 4250 4251 if (cr0 ^ val) 4252 icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE; 4253 4254 break; 4255 } 4256 case SVM_EXIT_READ_DR0: 4257 case SVM_EXIT_WRITE_DR0: 4258 icpt_info.exit_code += info->modrm_reg; 4259 break; 4260 case SVM_EXIT_MSR: 4261 if (info->intercept == x86_intercept_wrmsr) 4262 vmcb->control.exit_info_1 = 1; 4263 else 4264 vmcb->control.exit_info_1 = 0; 4265 break; 4266 case SVM_EXIT_PAUSE: 4267 /* 4268 * We get this for NOP only, but pause 4269 * is rep not, check this here 4270 */ 4271 if (info->rep_prefix != REPE_PREFIX) 4272 goto out; 4273 case SVM_EXIT_IOIO: { 4274 u64 exit_info; 4275 u32 bytes; 4276 4277 if (info->intercept == x86_intercept_in || 4278 info->intercept == x86_intercept_ins) { 4279 exit_info = ((info->src_val & 0xffff) << 16) | 4280 SVM_IOIO_TYPE_MASK; 4281 bytes = info->dst_bytes; 4282 } else { 4283 exit_info = (info->dst_val & 0xffff) << 16; 4284 bytes = info->src_bytes; 4285 } 4286 4287 if (info->intercept == x86_intercept_outs || 4288 info->intercept == x86_intercept_ins) 4289 exit_info |= SVM_IOIO_STR_MASK; 4290 4291 if (info->rep_prefix) 4292 exit_info |= SVM_IOIO_REP_MASK; 4293 4294 bytes = min(bytes, 4u); 4295 4296 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT; 4297 4298 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1); 4299 4300 vmcb->control.exit_info_1 = exit_info; 4301 vmcb->control.exit_info_2 = info->next_rip; 4302 4303 break; 4304 } 4305 default: 4306 break; 4307 } 4308 4309 vmcb->control.next_rip = info->next_rip; 4310 vmcb->control.exit_code = icpt_info.exit_code; 4311 vmexit = nested_svm_exit_handled(svm); 4312 4313 ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED 4314 : X86EMUL_CONTINUE; 4315 4316out: 4317 return ret; 4318} 4319 4320static void svm_handle_external_intr(struct kvm_vcpu *vcpu) 4321{ 4322 local_irq_enable(); 4323} 4324 4325static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) 4326{ 4327} 4328 4329static struct kvm_x86_ops svm_x86_ops = { 4330 .cpu_has_kvm_support = has_svm, 4331 .disabled_by_bios = is_disabled, 4332 .hardware_setup = svm_hardware_setup, 4333 .hardware_unsetup = svm_hardware_unsetup, 4334 .check_processor_compatibility = svm_check_processor_compat, 4335 .hardware_enable = svm_hardware_enable, 4336 .hardware_disable = svm_hardware_disable, 4337 .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, 4338 4339 .vcpu_create = svm_create_vcpu, 4340 .vcpu_free = svm_free_vcpu, 4341 .vcpu_reset = svm_vcpu_reset, 4342 4343 .prepare_guest_switch = svm_prepare_guest_switch, 4344 .vcpu_load = svm_vcpu_load, 4345 .vcpu_put = svm_vcpu_put, 4346 4347 .update_db_bp_intercept = update_db_bp_intercept, 4348 .get_msr = svm_get_msr, 4349 .set_msr = svm_set_msr, 4350 .get_segment_base = svm_get_segment_base, 4351 .get_segment = svm_get_segment, 4352 .set_segment = svm_set_segment, 4353 .get_cpl = svm_get_cpl, 4354 .get_cs_db_l_bits = kvm_get_cs_db_l_bits, 4355 .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, 4356 .decache_cr3 = svm_decache_cr3, 4357 .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, 4358 .set_cr0 = svm_set_cr0, 4359 .set_cr3 = svm_set_cr3, 4360 .set_cr4 = svm_set_cr4, 4361 .set_efer = svm_set_efer, 4362 .get_idt = svm_get_idt, 4363 .set_idt = svm_set_idt, 4364 .get_gdt = svm_get_gdt, 4365 .set_gdt = svm_set_gdt, 4366 .get_dr6 = svm_get_dr6, 4367 .set_dr6 = svm_set_dr6, 4368 .set_dr7 = svm_set_dr7, 4369 .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, 4370 .cache_reg = svm_cache_reg, 4371 .get_rflags = svm_get_rflags, 4372 .set_rflags = svm_set_rflags, 4373 .fpu_deactivate = svm_fpu_deactivate, 4374 4375 .tlb_flush = svm_flush_tlb, 4376 4377 .run = svm_vcpu_run, 4378 .handle_exit = handle_exit, 4379 .skip_emulated_instruction = skip_emulated_instruction, 4380 .set_interrupt_shadow = svm_set_interrupt_shadow, 4381 .get_interrupt_shadow = svm_get_interrupt_shadow, 4382 .patch_hypercall = svm_patch_hypercall, 4383 .set_irq = svm_set_irq, 4384 .set_nmi = svm_inject_nmi, 4385 .queue_exception = svm_queue_exception, 4386 .cancel_injection = svm_cancel_injection, 4387 .interrupt_allowed = svm_interrupt_allowed, 4388 .nmi_allowed = svm_nmi_allowed, 4389 .get_nmi_mask = svm_get_nmi_mask, 4390 .set_nmi_mask = svm_set_nmi_mask, 4391 .enable_nmi_window = enable_nmi_window, 4392 .enable_irq_window = enable_irq_window, 4393 .update_cr8_intercept = update_cr8_intercept, 4394 .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, 4395 .vm_has_apicv = svm_vm_has_apicv, 4396 .load_eoi_exitmap = svm_load_eoi_exitmap, 4397 .hwapic_isr_update = svm_hwapic_isr_update, 4398 .sync_pir_to_irr = svm_sync_pir_to_irr, 4399 4400 .set_tss_addr = svm_set_tss_addr, 4401 .get_tdp_level = get_npt_level, 4402 .get_mt_mask = svm_get_mt_mask, 4403 4404 .get_exit_info = svm_get_exit_info, 4405 4406 .get_lpage_level = svm_get_lpage_level, 4407 4408 .cpuid_update = svm_cpuid_update, 4409 4410 .rdtscp_supported = svm_rdtscp_supported, 4411 .invpcid_supported = svm_invpcid_supported, 4412 .mpx_supported = svm_mpx_supported, 4413 4414 .set_supported_cpuid = svm_set_supported_cpuid, 4415 4416 .has_wbinvd_exit = svm_has_wbinvd_exit, 4417 4418 .set_tsc_khz = svm_set_tsc_khz, 4419 .read_tsc_offset = svm_read_tsc_offset, 4420 .write_tsc_offset = svm_write_tsc_offset, 4421 .adjust_tsc_offset = svm_adjust_tsc_offset, 4422 .compute_tsc_offset = svm_compute_tsc_offset, 4423 .read_l1_tsc = svm_read_l1_tsc, 4424 4425 .set_tdp_cr3 = set_tdp_cr3, 4426 4427 .check_intercept = svm_check_intercept, 4428 .handle_external_intr = svm_handle_external_intr, 4429 4430 .sched_in = svm_sched_in, 4431}; 4432 4433static int __init svm_init(void) 4434{ 4435 return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm), 4436 __alignof__(struct vcpu_svm), THIS_MODULE); 4437} 4438 4439static void __exit svm_exit(void) 4440{ 4441 kvm_exit(); 4442} 4443 4444module_init(svm_init) 4445module_exit(svm_exit) 4446