#include "vmx.h"
#include "kvm.h"
-#define pgprintk(x...) do { printk(x); } while (0)
-#define rmap_printk(x...) do { printk(x); } while (0)
+#undef MMU_DEBUG
+
+#undef AUDIT
+
+#ifdef AUDIT
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
+#else
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
+#endif
+
+#ifdef MMU_DEBUG
+
+#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
+#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
+
+#else
+
+#define pgprintk(x...) do { } while (0)
+#define rmap_printk(x...) do { } while (0)
+
+#endif
+
+#if defined(MMU_DEBUG) || defined(AUDIT)
+static int dbg = 1;
+#endif
#define ASSERT(x) \
if (!(x)) { \
#define PFERR_PRESENT_MASK (1U << 0)
#define PFERR_WRITE_MASK (1U << 1)
#define PFERR_USER_MASK (1U << 2)
+#define PFERR_FETCH_MASK (1U << 4)
#define PT64_ROOT_LEVEL 4
#define PT32_ROOT_LEVEL 2
return 1;
}
+static int is_nx(struct kvm_vcpu *vcpu)
+{
+ return vcpu->shadow_efer & EFER_NX;
+}
+
static int is_present_pte(unsigned long pte)
{
return pte & PT_PRESENT_MASK;
if (!is_rmap_pte(*spte))
return;
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
- if (!page->private) {
+ if (!page_private(page)) {
rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
- page->private = (unsigned long)spte;
- } else if (!(page->private & 1)) {
+ set_page_private(page,(unsigned long)spte);
+ } else if (!(page_private(page) & 1)) {
rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
desc = mmu_alloc_rmap_desc(vcpu);
- desc->shadow_ptes[0] = (u64 *)page->private;
+ desc->shadow_ptes[0] = (u64 *)page_private(page);
desc->shadow_ptes[1] = spte;
- page->private = (unsigned long)desc | 1;
+ set_page_private(page,(unsigned long)desc | 1);
} else {
rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
- desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
desc = desc->more;
if (desc->shadow_ptes[RMAP_EXT-1]) {
for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
;
desc->shadow_ptes[i] = desc->shadow_ptes[j];
- desc->shadow_ptes[j] = 0;
+ desc->shadow_ptes[j] = NULL;
if (j != 0)
return;
if (!prev_desc && !desc->more)
- page->private = (unsigned long)desc->shadow_ptes[0];
+ set_page_private(page,(unsigned long)desc->shadow_ptes[0]);
else
if (prev_desc)
prev_desc->more = desc->more;
else
- page->private = (unsigned long)desc->more | 1;
+ set_page_private(page,(unsigned long)desc->more | 1);
mmu_free_rmap_desc(vcpu, desc);
}
if (!is_rmap_pte(*spte))
return;
page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
- if (!page->private) {
+ if (!page_private(page)) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
BUG();
- } else if (!(page->private & 1)) {
+ } else if (!(page_private(page) & 1)) {
rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
- if ((u64 *)page->private != spte) {
+ if ((u64 *)page_private(page) != spte) {
printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
spte, *spte);
BUG();
}
- page->private = 0;
+ set_page_private(page,0);
} else {
rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
- desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
prev_desc = NULL;
while (desc) {
for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
BUG_ON(!slot);
page = gfn_to_page(slot, gfn);
- while (page->private) {
- if (!(page->private & 1))
- spte = (u64 *)page->private;
+ while (page_private(page)) {
+ if (!(page_private(page) & 1))
+ spte = (u64 *)page_private(page);
else {
- desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ desc = (struct kvm_rmap_desc *)(page_private(page) & ~1ul);
spte = desc->shadow_ptes[0];
}
BUG_ON(!spte);
BUG_ON(!(*spte & PT_WRITABLE_MASK));
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
rmap_remove(vcpu, spte);
+ kvm_arch_ops->tlb_flush(vcpu);
*spte &= ~(u64)PT_WRITABLE_MASK;
}
}
rmap_remove(vcpu, &pt[i]);
pt[i] = 0;
}
+ kvm_arch_ops->tlb_flush(vcpu);
return;
}
hpa_t root = vcpu->mmu.root_hpa;
ASSERT(!VALID_PAGE(root));
- root = kvm_mmu_get_page(vcpu, root_gfn, 0,
- PT64_ROOT_LEVEL, 0, NULL)->page_hpa;
- page = page_header(root);
+ page = kvm_mmu_get_page(vcpu, root_gfn, 0,
+ PT64_ROOT_LEVEL, 0, NULL);
+ root = page->page_hpa;
++page->root_count;
vcpu->mmu.root_hpa = root;
return;
root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
else if (vcpu->mmu.root_level == 0)
root_gfn = 0;
- root = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
+ page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
PT32_ROOT_LEVEL, !is_paging(vcpu),
- NULL)->page_hpa;
- page = page_header(root);
+ NULL);
+ root = page->page_hpa;
++page->root_count;
vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
}
{
pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
mmu_free_roots(vcpu);
+ if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
+ kvm_mmu_free_some_pages(vcpu);
mmu_alloc_roots(vcpu);
kvm_mmu_flush_tlb(vcpu);
kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
pgprintk("%s: found shadow page for %lx, marking ro\n",
__FUNCTION__, gfn);
access_bits &= ~PT_WRITABLE_MASK;
- *shadow_pte &= ~PT_WRITABLE_MASK;
+ if (is_writeble_pte(*shadow_pte)) {
+ *shadow_pte &= ~PT_WRITABLE_MASK;
+ kvm_arch_ops->tlb_flush(vcpu);
+ }
}
}
return 0;
}
-static int may_access(u64 pte, int write, int user)
-{
-
- if (user && !(pte & PT_USER_MASK))
- return 0;
- if (write && !(pte & PT_WRITABLE_MASK))
- return 0;
- return 1;
-}
-
static void paging_free(struct kvm_vcpu *vcpu)
{
nonpaging_free(vcpu);
INIT_LIST_HEAD(&page_header->link);
if ((page = alloc_page(GFP_KERNEL)) == NULL)
goto error_1;
- page->private = (unsigned long)page_header;
+ set_page_private(page, (unsigned long)page_header);
page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
list_add(&page_header->link, &vcpu->free_pages);
}
}
}
+
+#ifdef AUDIT
+
+static const char *audit_msg;
+
+static gva_t canonicalize(gva_t gva)
+{
+#ifdef CONFIG_X86_64
+ gva = (long long)(gva << 16) >> 16;
+#endif
+ return gva;
+}
+
+static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
+ gva_t va, int level)
+{
+ u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
+ int i;
+ gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
+ u64 ent = pt[i];
+
+ if (!ent & PT_PRESENT_MASK)
+ continue;
+
+ va = canonicalize(va);
+ if (level > 1)
+ audit_mappings_page(vcpu, ent, va, level - 1);
+ else {
+ gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
+ hpa_t hpa = gpa_to_hpa(vcpu, gpa);
+
+ if ((ent & PT_PRESENT_MASK)
+ && (ent & PT64_BASE_ADDR_MASK) != hpa)
+ printk(KERN_ERR "audit error: (%s) levels %d"
+ " gva %lx gpa %llx hpa %llx ent %llx\n",
+ audit_msg, vcpu->mmu.root_level,
+ va, gpa, hpa, ent);
+ }
+ }
+}
+
+static void audit_mappings(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ if (vcpu->mmu.root_level == 4)
+ audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
+ else
+ for (i = 0; i < 4; ++i)
+ if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
+ audit_mappings_page(vcpu,
+ vcpu->mmu.pae_root[i],
+ i << 30,
+ 2);
+}
+
+static int count_rmaps(struct kvm_vcpu *vcpu)
+{
+ int nmaps = 0;
+ int i, j, k;
+
+ for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
+ struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
+ struct kvm_rmap_desc *d;
+
+ for (j = 0; j < m->npages; ++j) {
+ struct page *page = m->phys_mem[j];
+
+ if (!page->private)
+ continue;
+ if (!(page->private & 1)) {
+ ++nmaps;
+ continue;
+ }
+ d = (struct kvm_rmap_desc *)(page->private & ~1ul);
+ while (d) {
+ for (k = 0; k < RMAP_EXT; ++k)
+ if (d->shadow_ptes[k])
+ ++nmaps;
+ else
+ break;
+ d = d->more;
+ }
+ }
+ }
+ return nmaps;
+}
+
+static int count_writable_mappings(struct kvm_vcpu *vcpu)
+{
+ int nmaps = 0;
+ struct kvm_mmu_page *page;
+ int i;
+
+ list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
+ u64 *pt = __va(page->page_hpa);
+
+ if (page->role.level != PT_PAGE_TABLE_LEVEL)
+ continue;
+
+ for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
+ u64 ent = pt[i];
+
+ if (!(ent & PT_PRESENT_MASK))
+ continue;
+ if (!(ent & PT_WRITABLE_MASK))
+ continue;
+ ++nmaps;
+ }
+ }
+ return nmaps;
+}
+
+static void audit_rmap(struct kvm_vcpu *vcpu)
+{
+ int n_rmap = count_rmaps(vcpu);
+ int n_actual = count_writable_mappings(vcpu);
+
+ if (n_rmap != n_actual)
+ printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
+ __FUNCTION__, audit_msg, n_rmap, n_actual);
+}
+
+static void audit_write_protection(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mmu_page *page;
+
+ list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
+ hfn_t hfn;
+ struct page *pg;
+
+ if (page->role.metaphysical)
+ continue;
+
+ hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)
+ >> PAGE_SHIFT;
+ pg = pfn_to_page(hfn);
+ if (pg->private)
+ printk(KERN_ERR "%s: (%s) shadow page has writable"
+ " mappings: gfn %lx role %x\n",
+ __FUNCTION__, audit_msg, page->gfn,
+ page->role.word);
+ }
+}
+
+static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
+{
+ int olddbg = dbg;
+
+ dbg = 0;
+ audit_msg = msg;
+ audit_rmap(vcpu);
+ audit_write_protection(vcpu);
+ audit_mappings(vcpu);
+ dbg = olddbg;
+}
+
+#endif