]> Pileus Git - ~andy/linux/blobdiff - arch/x86/kvm/x86.c
Merge tag 'fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/arm...
[~andy/linux] / arch / x86 / kvm / x86.c
index 5d004da1e35da9bdad0260f5d6b0f287330b194c..2b8578432d5bccd296fa6d5859e3575c0fe4aa02 100644 (file)
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
 static bool ignore_msrs = 0;
 module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
 
+unsigned int min_timer_period_us = 500;
+module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
+
 bool kvm_has_tsc_control;
 EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
 u32  kvm_max_guest_tsc_khz;
@@ -254,10 +257,26 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
-{
-       /* TODO: reserve bits check */
-       kvm_lapic_set_base(vcpu, data);
+int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+       u64 old_state = vcpu->arch.apic_base &
+               (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+       u64 new_state = msr_info->data &
+               (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+       u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) |
+               0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE);
+
+       if (!msr_info->host_initiated &&
+           ((msr_info->data & reserved_bits) != 0 ||
+            new_state == X2APIC_ENABLE ||
+            (new_state == MSR_IA32_APICBASE_ENABLE &&
+             old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) ||
+            (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) &&
+             old_state == 0)))
+               return 1;
+
+       kvm_lapic_set_base(vcpu, msr_info->data);
+       return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
@@ -719,6 +738,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr6(struct kvm_vcpu *vcpu)
+{
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+               kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
+}
+
 static void kvm_update_dr7(struct kvm_vcpu *vcpu)
 {
        unsigned long dr7;
@@ -747,6 +772,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
                if (val & 0xffffffff00000000ULL)
                        return -1; /* #GP */
                vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+               kvm_update_dr6(vcpu);
                break;
        case 5:
                if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -788,7 +814,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
                        return 1;
                /* fall through */
        case 6:
-               *val = vcpu->arch.dr6;
+               if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+                       *val = vcpu->arch.dr6;
+               else
+                       *val = kvm_x86_ops->get_dr6(vcpu);
                break;
        case 5:
                if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -836,11 +865,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN    10
+#define KVM_SAVE_MSRS_BEGIN    12
 static u32 msrs_to_save[] = {
        MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
        MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
        HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+       HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
        HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
        MSR_KVM_PV_EOI_EN,
        MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1275,8 +1305,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
        kvm->arch.last_tsc_write = data;
        kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
 
-       /* Reset of TSC must disable overshoot protection below */
-       vcpu->arch.hv_clock.tsc_timestamp = 0;
        vcpu->arch.last_guest_tsc = data;
 
        /* Keep track of which generation this VCPU has synchronized to */
@@ -1484,7 +1512,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        unsigned long flags, this_tsc_khz;
        struct kvm_vcpu_arch *vcpu = &v->arch;
        struct kvm_arch *ka = &v->kvm->arch;
-       s64 kernel_ns, max_kernel_ns;
+       s64 kernel_ns;
        u64 tsc_timestamp, host_tsc;
        struct pvclock_vcpu_time_info guest_hv_clock;
        u8 pvclock_flags;
@@ -1543,37 +1571,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
        if (!vcpu->pv_time_enabled)
                return 0;
 
-       /*
-        * Time as measured by the TSC may go backwards when resetting the base
-        * tsc_timestamp.  The reason for this is that the TSC resolution is
-        * higher than the resolution of the other clock scales.  Thus, many
-        * possible measurments of the TSC correspond to one measurement of any
-        * other clock, and so a spread of values is possible.  This is not a
-        * problem for the computation of the nanosecond clock; with TSC rates
-        * around 1GHZ, there can only be a few cycles which correspond to one
-        * nanosecond value, and any path through this code will inevitably
-        * take longer than that.  However, with the kernel_ns value itself,
-        * the precision may be much lower, down to HZ granularity.  If the
-        * first sampling of TSC against kernel_ns ends in the low part of the
-        * range, and the second in the high end of the range, we can get:
-        *
-        * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
-        *
-        * As the sampling errors potentially range in the thousands of cycles,
-        * it is possible such a time value has already been observed by the
-        * guest.  To protect against this, we must compute the system time as
-        * observed by the guest and ensure the new system time is greater.
-        */
-       max_kernel_ns = 0;
-       if (vcpu->hv_clock.tsc_timestamp) {
-               max_kernel_ns = vcpu->last_guest_tsc -
-                               vcpu->hv_clock.tsc_timestamp;
-               max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
-                                   vcpu->hv_clock.tsc_to_system_mul,
-                                   vcpu->hv_clock.tsc_shift);
-               max_kernel_ns += vcpu->last_kernel_ns;
-       }
-
        if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
                kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
                                   &vcpu->hv_clock.tsc_shift,
@@ -1581,14 +1578,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
                vcpu->hw_tsc_khz = this_tsc_khz;
        }
 
-       /* with a master <monotonic time, tsc value> tuple,
-        * pvclock clock reads always increase at the (scaled) rate
-        * of guest TSC - no need to deal with sampling errors.
-        */
-       if (!use_master_clock) {
-               if (max_kernel_ns > kernel_ns)
-                       kernel_ns = max_kernel_ns;
-       }
        /* With all the info we got, fill in the values */
        vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
        vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
@@ -1826,6 +1815,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
        switch (msr) {
        case HV_X64_MSR_GUEST_OS_ID:
        case HV_X64_MSR_HYPERCALL:
+       case HV_X64_MSR_REFERENCE_TSC:
+       case HV_X64_MSR_TIME_REF_COUNT:
                r = true;
                break;
        }
@@ -1865,6 +1856,21 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
                if (__copy_to_user((void __user *)addr, instructions, 4))
                        return 1;
                kvm->arch.hv_hypercall = data;
+               mark_page_dirty(kvm, gfn);
+               break;
+       }
+       case HV_X64_MSR_REFERENCE_TSC: {
+               u64 gfn;
+               HV_REFERENCE_TSC_PAGE tsc_ref;
+               memset(&tsc_ref, 0, sizeof(tsc_ref));
+               kvm->arch.hv_tsc_page = data;
+               if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+                       break;
+               gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+               if (kvm_write_guest(kvm, data,
+                       &tsc_ref, sizeof(tsc_ref)))
+                       return 1;
+               mark_page_dirty(kvm, gfn);
                break;
        }
        default:
@@ -1879,19 +1885,21 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
        switch (msr) {
        case HV_X64_MSR_APIC_ASSIST_PAGE: {
+               u64 gfn;
                unsigned long addr;
 
                if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
                        vcpu->arch.hv_vapic = data;
                        break;
                }
-               addr = gfn_to_hva(vcpu->kvm, data >>
-                                 HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT);
+               gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
+               addr = gfn_to_hva(vcpu->kvm, gfn);
                if (kvm_is_error_hva(addr))
                        return 1;
                if (__clear_user((void __user *)addr, PAGE_SIZE))
                        return 1;
                vcpu->arch.hv_vapic = data;
+               mark_page_dirty(vcpu->kvm, gfn);
                break;
        }
        case HV_X64_MSR_EOI:
@@ -2017,8 +2025,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case 0x200 ... 0x2ff:
                return set_msr_mtrr(vcpu, msr, data);
        case MSR_IA32_APICBASE:
-               kvm_set_apic_base(vcpu, data);
-               break;
+               return kvm_set_apic_base(vcpu, msr_info);
        case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff:
                return kvm_x2apic_msr_write(vcpu, msr, data);
        case MSR_IA32_TSCDEADLINE:
@@ -2291,6 +2298,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
        case HV_X64_MSR_HYPERCALL:
                data = kvm->arch.hv_hypercall;
                break;
+       case HV_X64_MSR_TIME_REF_COUNT: {
+               data =
+                    div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+               break;
+       }
+       case HV_X64_MSR_REFERENCE_TSC:
+               data = kvm->arch.hv_tsc_page;
+               break;
        default:
                vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
                return 1;
@@ -2601,6 +2616,7 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_GET_TSC_KHZ:
        case KVM_CAP_KVMCLOCK_CTRL:
        case KVM_CAP_READONLY_MEM:
+       case KVM_CAP_HYPERV_TIME:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
        case KVM_CAP_ASSIGN_DEV_IRQ:
        case KVM_CAP_PCI_2_3:
@@ -2972,8 +2988,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
                                             struct kvm_debugregs *dbgregs)
 {
+       unsigned long val;
+
        memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-       dbgregs->dr6 = vcpu->arch.dr6;
+       _kvm_get_dr(vcpu, 6, &val);
+       dbgregs->dr6 = val;
        dbgregs->dr7 = vcpu->arch.dr7;
        dbgregs->flags = 0;
        memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
@@ -2987,7 +3006,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 
        memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
        vcpu->arch.dr6 = dbgregs->dr6;
+       kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = dbgregs->dr7;
+       kvm_update_dr7(vcpu);
 
        return 0;
 }
@@ -5834,6 +5855,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
        kvm_apic_update_tmr(vcpu, tmr);
 }
 
+/*
+ * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * exiting to the userspace.  Otherwise, the value will be returned to the
+ * userspace.
+ */
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
        int r;
@@ -6089,7 +6115,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                }
                if (need_resched()) {
                        srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-                       kvm_resched(vcpu);
+                       cond_resched();
                        vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
                }
        }
@@ -6160,7 +6186,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
                frag->len -= len;
        }
 
-       if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+       if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) {
                vcpu->mmio_needed = 0;
 
                /* FIXME: return into emulator if single-stepping.  */
@@ -6401,6 +6427,7 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
                                  struct kvm_sregs *sregs)
 {
+       struct msr_data apic_base_msr;
        int mmu_reset_needed = 0;
        int pending_vec, max_bits, idx;
        struct desc_ptr dt;
@@ -6424,7 +6451,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
        mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
        kvm_x86_ops->set_efer(vcpu, sregs->efer);
-       kvm_set_apic_base(vcpu, sregs->apic_base);
+       apic_base_msr.data = sregs->apic_base;
+       apic_base_msr.host_initiated = true;
+       kvm_set_apic_base(vcpu, &apic_base_msr);
 
        mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
        kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
@@ -6717,6 +6746,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 
        memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
        vcpu->arch.dr6 = DR6_FIXED_1;
+       kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = DR7_FIXED_1;
        kvm_update_dr7(vcpu);