]> Pileus Git - ~andy/linux/blobdiff - arch/x86/kvm/vmx.c
Merge branch 'kconfig' of git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild
[~andy/linux] / arch / x86 / kvm / vmx.c
index 2ae0aa4461e828d18ff425a0a632cd3def28555f..1f1da43ff2a2ca66a137c434cf738dbf7a03e704 100644 (file)
@@ -712,10 +712,10 @@ static void nested_release_page_clean(struct page *page)
        kvm_release_page_clean(page);
 }
 
+static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
 static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
-static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
                            struct kvm_segment *var, int seg);
@@ -1040,8 +1040,7 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
                (vmcs12->secondary_vm_exec_control & bit);
 }
 
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
-       struct kvm_vcpu *vcpu)
+static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
 {
        return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
 }
@@ -2161,6 +2160,7 @@ static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high;
 static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high;
 static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high;
 static u32 nested_vmx_misc_low, nested_vmx_misc_high;
+static u32 nested_vmx_ept_caps;
 static __init void nested_vmx_setup_ctls_msrs(void)
 {
        /*
@@ -2196,13 +2196,15 @@ static __init void nested_vmx_setup_ctls_msrs(void)
         * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and
         * 17 must be 1.
         */
+       rdmsr(MSR_IA32_VMX_EXIT_CTLS,
+               nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high);
        nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
        /* Note that guest use of VM_EXIT_ACK_INTR_ON_EXIT is not supported. */
+       nested_vmx_exit_ctls_high &=
 #ifdef CONFIG_X86_64
-       nested_vmx_exit_ctls_high = VM_EXIT_HOST_ADDR_SPACE_SIZE;
-#else
-       nested_vmx_exit_ctls_high = 0;
+               VM_EXIT_HOST_ADDR_SPACE_SIZE |
 #endif
+               VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
        nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
                                      VM_EXIT_LOAD_IA32_EFER);
 
@@ -2212,9 +2214,13 @@ static __init void nested_vmx_setup_ctls_msrs(void)
        /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */
        nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
        nested_vmx_entry_ctls_high &=
-               VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
+#ifdef CONFIG_X86_64
+               VM_ENTRY_IA32E_MODE |
+#endif
+               VM_ENTRY_LOAD_IA32_PAT;
        nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
                                       VM_ENTRY_LOAD_IA32_EFER);
+
        /* cpu-based controls */
        rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
                nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
@@ -2248,6 +2254,22 @@ static __init void nested_vmx_setup_ctls_msrs(void)
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                SECONDARY_EXEC_WBINVD_EXITING;
 
+       if (enable_ept) {
+               /* nested EPT: emulate EPT also to L1 */
+               nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
+               nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
+                        VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
+               nested_vmx_ept_caps &= vmx_capability.ept;
+               /*
+                * Since invept is completely emulated we support both global
+                * and context invalidation independent of what host cpu
+                * supports
+                */
+               nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+                       VMX_EPT_EXTENT_CONTEXT_BIT;
+       } else
+               nested_vmx_ept_caps = 0;
+
        /* miscellaneous data */
        rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
        nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
@@ -2356,8 +2378,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
                                        nested_vmx_secondary_ctls_high);
                break;
        case MSR_IA32_VMX_EPT_VPID_CAP:
-               /* Currently, no nested ept or nested vpid */
-               *pdata = 0;
+               /* Currently, no nested vpid support */
+               *pdata = nested_vmx_ept_caps;
                break;
        default:
                return 0;
@@ -5463,6 +5485,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
 
                if (err == EMULATE_USER_EXIT) {
+                       ++vcpu->stat.mmio_exits;
                        ret = 0;
                        goto out;
                }
@@ -6279,6 +6302,74 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
        return 1;
 }
 
+/* Emulate the INVEPT instruction */
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+       u32 vmx_instruction_info, types;
+       unsigned long type;
+       gva_t gva;
+       struct x86_exception e;
+       struct {
+               u64 eptp, gpa;
+       } operand;
+       u64 eptp_mask = ((1ull << 51) - 1) & PAGE_MASK;
+
+       if (!(nested_vmx_secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) ||
+           !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       if (!nested_vmx_check_permission(vcpu))
+               return 1;
+
+       if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+       type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+       types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
+
+       if (!(types & (1UL << type))) {
+               nested_vmx_failValid(vcpu,
+                               VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               return 1;
+       }
+
+       /* According to the Intel VMX instruction reference, the memory
+        * operand is read even if it isn't needed (e.g., for type==global)
+        */
+       if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+                       vmx_instruction_info, &gva))
+               return 1;
+       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+                               sizeof(operand), &e)) {
+               kvm_inject_page_fault(vcpu, &e);
+               return 1;
+       }
+
+       switch (type) {
+       case VMX_EPT_EXTENT_CONTEXT:
+               if ((operand.eptp & eptp_mask) !=
+                               (nested_ept_get_cr3(vcpu) & eptp_mask))
+                       break;
+       case VMX_EPT_EXTENT_GLOBAL:
+               kvm_mmu_sync_roots(vcpu);
+               kvm_mmu_flush_tlb(vcpu);
+               nested_vmx_succeed(vcpu);
+               break;
+       default:
+               BUG_ON(1);
+               break;
+       }
+
+       skip_emulated_instruction(vcpu);
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -6323,6 +6414,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
        [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
        [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+       [EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -6549,6 +6641,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
        case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
        case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+       case EXIT_REASON_INVEPT:
                /*
                 * VMX instructions trap unconditionally. This allows L1 to
                 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -6581,7 +6674,20 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                return nested_cpu_has2(vmcs12,
                        SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
        case EXIT_REASON_EPT_VIOLATION:
+               /*
+                * L0 always deals with the EPT violation. If nested EPT is
+                * used, and the nested mmu code discovers that the address is
+                * missing in the guest EPT table (EPT12), the EPT violation
+                * will be injected with nested_ept_inject_page_fault()
+                */
+               return 0;
        case EXIT_REASON_EPT_MISCONFIG:
+               /*
+                * L2 never uses directly L1's EPT, but rather L0's own EPT
+                * table (shadow on EPT) or a merged EPT table that L0 built
+                * (EPT on EPT). So any problems with the structure of the
+                * table is L0's fault.
+                */
                return 0;
        case EXIT_REASON_PREEMPTION_TIMER:
                return vmcs12->pin_based_vm_exec_control &
@@ -6669,7 +6775,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 
        if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
            !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
-                                       get_vmcs12(vcpu), vcpu)))) {
+                                       get_vmcs12(vcpu))))) {
                if (vmx_interrupt_allowed(vcpu)) {
                        vmx->soft_vnmi_blocked = 0;
                } else if (vmx->vnmi_blocked_time > 1000000000LL &&
@@ -7595,9 +7701,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                        ~VM_ENTRY_IA32E_MODE) |
                (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
 
-       if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
+       if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) {
                vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
-       else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
+               vcpu->arch.pat = vmcs12->guest_ia32_pat;
+       } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
                vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
 
@@ -8118,8 +8225,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
        vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
        vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
 
-       if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT)
+       if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
                vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
+               vcpu->arch.pat = vmcs12->host_ia32_pat;
+       }
        if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
                vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
                        vmcs12->host_ia32_perf_global_ctrl);