]> Pileus Git - ~andy/linux/blobdiff - mm/huge_memory.c
thp: optimize away unnecessary page table locking
[~andy/linux] / mm / huge_memory.c
index 8f7fc394f63672954cc4444da0f2158a42470b2c..f0e5306eeb55e8e179da3abbe6c033045b6ad073 100644 (file)
@@ -1031,32 +1031,23 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 {
        int ret = 0;
 
-       spin_lock(&tlb->mm->page_table_lock);
-       if (likely(pmd_trans_huge(*pmd))) {
-               if (unlikely(pmd_trans_splitting(*pmd))) {
-                       spin_unlock(&tlb->mm->page_table_lock);
-                       wait_split_huge_page(vma->anon_vma,
-                                            pmd);
-               } else {
-                       struct page *page;
-                       pgtable_t pgtable;
-                       pgtable = get_pmd_huge_pte(tlb->mm);
-                       page = pmd_page(*pmd);
-                       pmd_clear(pmd);
-                       tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
-                       page_remove_rmap(page);
-                       VM_BUG_ON(page_mapcount(page) < 0);
-                       add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
-                       VM_BUG_ON(!PageHead(page));
-                       tlb->mm->nr_ptes--;
-                       spin_unlock(&tlb->mm->page_table_lock);
-                       tlb_remove_page(tlb, page);
-                       pte_free(tlb->mm, pgtable);
-                       ret = 1;
-               }
-       } else
+       if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+               struct page *page;
+               pgtable_t pgtable;
+               pgtable = get_pmd_huge_pte(tlb->mm);
+               page = pmd_page(*pmd);
+               pmd_clear(pmd);
+               tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+               page_remove_rmap(page);
+               VM_BUG_ON(page_mapcount(page) < 0);
+               add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+               VM_BUG_ON(!PageHead(page));
+               tlb->mm->nr_ptes--;
                spin_unlock(&tlb->mm->page_table_lock);
-
+               tlb_remove_page(tlb, page);
+               pte_free(tlb->mm, pgtable);
+               ret = 1;
+       }
        return ret;
 }
 
@@ -1066,21 +1057,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 {
        int ret = 0;
 
-       spin_lock(&vma->vm_mm->page_table_lock);
-       if (likely(pmd_trans_huge(*pmd))) {
-               ret = !pmd_trans_splitting(*pmd);
-               spin_unlock(&vma->vm_mm->page_table_lock);
-               if (unlikely(!ret))
-                       wait_split_huge_page(vma->anon_vma, pmd);
-               else {
-                       /*
-                        * All logical pages in the range are present
-                        * if backed by a huge page.
-                        */
-                       memset(vec, 1, (end - addr) >> PAGE_SHIFT);
-               }
-       } else
+       if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+               /*
+                * All logical pages in the range are present
+                * if backed by a huge page.
+                */
                spin_unlock(&vma->vm_mm->page_table_lock);
+               memset(vec, 1, (end - addr) >> PAGE_SHIFT);
+               ret = 1;
+       }
 
        return ret;
 }
@@ -1110,20 +1095,11 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
                goto out;
        }
 
-       spin_lock(&mm->page_table_lock);
-       if (likely(pmd_trans_huge(*old_pmd))) {
-               if (pmd_trans_splitting(*old_pmd)) {
-                       spin_unlock(&mm->page_table_lock);
-                       wait_split_huge_page(vma->anon_vma, old_pmd);
-                       ret = -1;
-               } else {
-                       pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
-                       VM_BUG_ON(!pmd_none(*new_pmd));
-                       set_pmd_at(mm, new_addr, new_pmd, pmd);
-                       spin_unlock(&mm->page_table_lock);
-                       ret = 1;
-               }
-       } else {
+       ret = __pmd_trans_huge_lock(old_pmd, vma);
+       if (ret == 1) {
+               pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
+               VM_BUG_ON(!pmd_none(*new_pmd));
+               set_pmd_at(mm, new_addr, new_pmd, pmd);
                spin_unlock(&mm->page_table_lock);
        }
 out:
@@ -1136,24 +1112,41 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
        struct mm_struct *mm = vma->vm_mm;
        int ret = 0;
 
-       spin_lock(&mm->page_table_lock);
+       if (__pmd_trans_huge_lock(pmd, vma) == 1) {
+               pmd_t entry;
+               entry = pmdp_get_and_clear(mm, addr, pmd);
+               entry = pmd_modify(entry, newprot);
+               set_pmd_at(mm, addr, pmd, entry);
+               spin_unlock(&vma->vm_mm->page_table_lock);
+               ret = 1;
+       }
+
+       return ret;
+}
+
+/*
+ * Returns 1 if a given pmd maps a stable (not under splitting) thp.
+ * Returns -1 if it maps a thp under splitting. Returns 0 otherwise.
+ *
+ * Note that if it returns 1, this routine returns without unlocking page
+ * table locks. So callers must unlock them.
+ */
+int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
+{
+       spin_lock(&vma->vm_mm->page_table_lock);
        if (likely(pmd_trans_huge(*pmd))) {
                if (unlikely(pmd_trans_splitting(*pmd))) {
-                       spin_unlock(&mm->page_table_lock);
+                       spin_unlock(&vma->vm_mm->page_table_lock);
                        wait_split_huge_page(vma->anon_vma, pmd);
+                       return -1;
                } else {
-                       pmd_t entry;
-
-                       entry = pmdp_get_and_clear(mm, addr, pmd);
-                       entry = pmd_modify(entry, newprot);
-                       set_pmd_at(mm, addr, pmd, entry);
-                       spin_unlock(&vma->vm_mm->page_table_lock);
-                       ret = 1;
+                       /* Thp mapped by 'pmd' is stable, so we can
+                        * handle it as it is. */
+                       return 1;
                }
-       } else
-               spin_unlock(&vma->vm_mm->page_table_lock);
-
-       return ret;
+       }
+       spin_unlock(&vma->vm_mm->page_table_lock);
+       return 0;
 }
 
 pmd_t *page_check_address_pmd(struct page *page,