]> Pileus Git - ~andy/linux/blobdiff - arch/x86/kvm/x86.c
kvm: optimize out smp_mb after srcu_read_unlock
[~andy/linux] / arch / x86 / kvm / x86.c
index e5ca72a5cdb6da13617033ad8c0c65c4391d9e2f..21ef1ba184ae8500a70061f566ea55fde76cbfd2 100644 (file)
@@ -577,6 +577,7 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 {
        u64 xcr0;
+       u64 valid_bits;
 
        /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */
        if (index != XCR_XFEATURE_ENABLED_MASK)
@@ -586,8 +587,16 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
                return 1;
        if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
                return 1;
-       if (xcr0 & ~host_xcr0)
+
+       /*
+        * Do not allow the guest to set bits that we do not support
+        * saving.  However, xcr0 bit 0 is always set, even if the
+        * emulated CPU does not support XSAVE (see fx_init).
+        */
+       valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
+       if (xcr0 & ~valid_bits)
                return 1;
+
        kvm_put_guest_xcr0(vcpu);
        vcpu->arch.xcr0 = xcr0;
        return 0;
@@ -684,7 +693,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
        vcpu->arch.cr3 = cr3;
        __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-       vcpu->arch.mmu.new_cr3(vcpu);
+       kvm_mmu_new_cr3(vcpu);
        return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -2564,6 +2573,7 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
        case KVM_CAP_SET_TSS_ADDR:
        case KVM_CAP_EXT_CPUID:
+       case KVM_CAP_EXT_EMUL_CPUID:
        case KVM_CAP_CLOCKSOURCE:
        case KVM_CAP_PIT:
        case KVM_CAP_NOP_IO_DELAY:
@@ -2673,15 +2683,17 @@ long kvm_arch_dev_ioctl(struct file *filp,
                r = 0;
                break;
        }
-       case KVM_GET_SUPPORTED_CPUID: {
+       case KVM_GET_SUPPORTED_CPUID:
+       case KVM_GET_EMULATED_CPUID: {
                struct kvm_cpuid2 __user *cpuid_arg = argp;
                struct kvm_cpuid2 cpuid;
 
                r = -EFAULT;
                if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
                        goto out;
-               r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
-                                                     cpuid_arg->entries);
+
+               r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
+                                           ioctl);
                if (r)
                        goto out;
 
@@ -2715,8 +2727,7 @@ static void wbinvd_ipi(void *garbage)
 
 static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
 {
-       return vcpu->kvm->arch.iommu_domain &&
-               !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY);
+       return kvm_arch_has_noncoherent_dma(vcpu->kvm);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -2984,11 +2995,13 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                                         struct kvm_xsave *guest_xsave)
 {
-       if (cpu_has_xsave)
+       if (cpu_has_xsave) {
                memcpy(guest_xsave->region,
                        &vcpu->arch.guest_fpu.state->xsave,
-                       xstate_size);
-       else {
+                       vcpu->arch.guest_xstate_size);
+               *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
+                       vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+       } else {
                memcpy(guest_xsave->region,
                        &vcpu->arch.guest_fpu.state->fxsave,
                        sizeof(struct i387_fxsave_struct));
@@ -3003,10 +3016,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
        u64 xstate_bv =
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)];
 
-       if (cpu_has_xsave)
+       if (cpu_has_xsave) {
+               /*
+                * Here we allow setting states that are not present in
+                * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
+                * with old userspace.
+                */
+               if (xstate_bv & ~KVM_SUPPORTED_XCR0)
+                       return -EINVAL;
+               if (xstate_bv & ~host_xcr0)
+                       return -EINVAL;
                memcpy(&vcpu->arch.guest_fpu.state->xsave,
-                       guest_xsave->region, xstate_size);
-       else {
+                       guest_xsave->region, vcpu->arch.guest_xstate_size);
+       else {
                if (xstate_bv & ~XSTATE_FPSSE)
                        return -EINVAL;
                memcpy(&vcpu->arch.guest_fpu.state->fxsave,
@@ -3042,9 +3064,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
 
        for (i = 0; i < guest_xcrs->nr_xcrs; i++)
                /* Only support XCR0 currently */
-               if (guest_xcrs->xcrs[0].xcr == XCR_XFEATURE_ENABLED_MASK) {
+               if (guest_xcrs->xcrs[i].xcr == XCR_XFEATURE_ENABLED_MASK) {
                        r = __kvm_set_xcr(vcpu, XCR_XFEATURE_ENABLED_MASK,
-                               guest_xcrs->xcrs[0].value);
+                               guest_xcrs->xcrs[i].value);
                        break;
                }
        if (r)
@@ -4775,8 +4797,8 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
 
 static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
 {
-       memset(&ctxt->twobyte, 0,
-              (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
+       memset(&ctxt->opcode_len, 0,
+              (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
 
        ctxt->fetch.start = 0;
        ctxt->fetch.end = 0;
@@ -5094,8 +5116,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                ctxt->have_exception = false;
                ctxt->perm_ok = false;
 
-               ctxt->only_vendor_specific_insn
-                       = emulation_type & EMULTYPE_TRAP_UD;
+               ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
 
                r = x86_decode_insn(ctxt, insn, insn_len);
 
@@ -5263,7 +5284,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
        smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list) {
                kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (vcpu->cpu != freq->cpu)
@@ -5273,7 +5294,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
                                send_ipi = 1;
                }
        }
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
 
        if (freq->old < freq->new && send_ipi) {
                /*
@@ -5426,12 +5447,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
        struct kvm_vcpu *vcpu;
        int i;
 
-       raw_spin_lock(&kvm_lock);
+       spin_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
                kvm_for_each_vcpu(i, vcpu, kvm)
                        set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
        atomic_set(&kvm_guest_has_master_clock, 0);
-       raw_spin_unlock(&kvm_lock);
+       spin_unlock(&kvm_lock);
 }
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@@ -5945,10 +5966,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        vcpu->mode = IN_GUEST_MODE;
 
+       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+
        /* We should set ->mode before check ->requests,
         * see the comment in make_all_cpus_request.
         */
-       smp_mb();
+       smp_mb__after_srcu_read_unlock();
 
        local_irq_disable();
 
@@ -5958,12 +5981,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                smp_wmb();
                local_irq_enable();
                preempt_enable();
+               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = 1;
                goto cancel_injection;
        }
 
-       srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-
        if (req_immediate_exit)
                smp_send_reschedule(vcpu->cpu);
 
@@ -6688,7 +6710,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        if (r)
                return r;
        kvm_vcpu_reset(vcpu);
-       r = kvm_mmu_setup(vcpu);
+       kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
 
        return r;
@@ -6940,6 +6962,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
        vcpu->arch.ia32_tsc_adjust_msr = 0x0;
        vcpu->arch.pv_time_enabled = false;
+
+       vcpu->arch.guest_supported_xcr0 = 0;
+       vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
 
@@ -6981,6 +7007,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
        INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
        INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
+       atomic_set(&kvm->arch.noncoherent_dma_count, 0);
 
        /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
        set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
@@ -7065,7 +7092,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
-void kvm_arch_free_memslot(struct kvm_memory_slot *free,
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
                           struct kvm_memory_slot *dont)
 {
        int i;
@@ -7086,7 +7113,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
        }
 }
 
-int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
+int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
+                           unsigned long npages)
 {
        int i;
 
@@ -7283,7 +7311,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
        int r;
 
        if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) ||
-             is_error_page(work->page))
+             work->wakeup_all)
                return;
 
        r = kvm_mmu_reload(vcpu);
@@ -7393,7 +7421,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
        struct x86_exception fault;
 
        trace_kvm_async_pf_ready(work->arch.token, work->gva);
-       if (is_error_page(work->page))
+       if (work->wakeup_all)
                work->arch.token = ~0; /* broadcast wakeup */
        else
                kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
@@ -7420,6 +7448,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
                        kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
+void kvm_arch_register_noncoherent_dma(struct kvm *kvm)
+{
+       atomic_inc(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma);
+
+void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm)
+{
+       atomic_dec(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma);
+
+bool kvm_arch_has_noncoherent_dma(struct kvm *kvm)
+{
+       return atomic_read(&kvm->arch.noncoherent_dma_count);
+}
+EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);