static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
{
- return kvm_memslots(kvm)->generation & MMIO_GEN_MASK;
+ /*
+ * Init kvm generation close to MMIO_MAX_GEN to easily test the
+ * code of handling generation number wrap-around.
+ */
+ return (kvm_memslots(kvm)->generation +
+ MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
}
static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
unsigned access)
{
- struct kvm_mmu_page *sp = page_header(__pa(sptep));
unsigned int gen = kvm_current_mmio_generation(kvm);
u64 mask = generation_mmio_spte_mask(gen);
access &= ACC_WRITE_MASK | ACC_USER_MASK;
mask |= shadow_mmio_mask | access | gfn << PAGE_SHIFT;
- sp->mmio_cached = true;
trace_mark_mmio_spte(sptep, gfn, access, gen);
mmu_spte_set(sptep, mask);
/*
* The idea using the light way get the spte on x86_32 guest is from
* gup_get_pte(arch/x86/mm/gup.c).
- * The difference is we can not catch the spte tlb flush if we leave
- * guest mode, so we emulate it by increase clear_spte_count when spte
- * is cleared.
+ *
+ * An spte tlb flush may be pending, because kvm_set_pte_rmapp
+ * coalesces them and we are running out of the MMU lock. Therefore
+ * we need to protect against in-progress updates of the spte.
+ *
+ * Reading the spte while an update is in progress may get the old value
+ * for the high part of the spte. The race is fine for a present->non-present
+ * change (because the high part of the spte is ignored for non-present spte),
+ * but for a present->present change we must reread the spte.
+ *
+ * All such changes are done in two steps (present->non-present and
+ * non-present->present), hence it is enough to count the number of
+ * present->non-present updates: if it changed while reading the spte,
+ * we might have hit the race. This is done using clear_spte_count.
*/
static u64 __get_spte_lockless(u64 *sptep)
{
spin_unlock(&kvm->mmu_lock);
}
-static void kvm_mmu_zap_mmio_sptes(struct kvm *kvm)
-{
- struct kvm_mmu_page *sp, *node;
- LIST_HEAD(invalid_list);
-
- spin_lock(&kvm->mmu_lock);
-restart:
- list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
- if (!sp->mmio_cached)
- continue;
- if (kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list))
- goto restart;
- }
-
- kvm_mmu_commit_zap_page(kvm, &invalid_list);
- spin_unlock(&kvm->mmu_lock);
-}
-
static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
{
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
* The max value is MMIO_MAX_GEN - 1 since it is not called
* when mark memslot invalid.
*/
- if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1)))
- kvm_mmu_zap_mmio_sptes(kvm);
+ if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) {
+ printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
+ kvm_mmu_invalidate_zap_all_pages(kvm);
+ }
}
static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)