X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=mm%2Fhuge_memory.c;h=e187454d82f666a70bab08762ca92cd60b8f848c;hb=08b018327c2e8412fd76f821e9bb9de36ef48cb1;hp=30c3cec8202396a0cc223361f999fc094083b7ab;hpb=13ece886d99cd668483113f7238e419d5331af26;p=~andy%2Flinux diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 30c3cec8202..e187454d82f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include "internal.h" @@ -487,7 +489,15 @@ static int __init hugepage_init(void) int err; #ifdef CONFIG_SYSFS static struct kobject *hugepage_kobj; +#endif + err = -EINVAL; + if (!has_transparent_hugepage()) { + transparent_hugepage_flags = 0; + goto out; + } + +#ifdef CONFIG_SYSFS err = -ENOMEM; hugepage_kobj = kobject_create_and_add("transparent_hugepage", mm_kobj); if (unlikely(!hugepage_kobj)) { @@ -518,6 +528,14 @@ static int __init hugepage_init(void) goto out; } + /* + * By default disable transparent hugepages on smaller systems, + * where the extra memory used could hurt more than TLB overhead + * is likely to save. The admin can still enable it through /sys. + */ + if (totalram_pages < (512 << (20 - PAGE_SHIFT))) + transparent_hugepage_flags = 0; + start_khugepaged(); set_recommended_min_free_kbytes(); @@ -1075,8 +1093,16 @@ pmd_t *page_check_address_pmd(struct page *page, goto out; if (pmd_page(*pmd) != page) goto out; - VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG && - pmd_trans_splitting(*pmd)); + /* + * split_vma() may create temporary aliased mappings. There is + * no risk as long as all huge pmd are found and have their + * splitting bit set before __split_huge_page_refcount + * runs. Finding the same huge pmd more than once during the + * same rmap walk is not a problem. + */ + if (flag == PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG && + pmd_trans_splitting(*pmd)) + goto out; if (pmd_trans_huge(*pmd)) { VM_BUG_ON(flag == PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG && !pmd_trans_splitting(*pmd)); @@ -1118,6 +1144,7 @@ static void __split_huge_page_refcount(struct page *page) int i; unsigned long head_index = page->index; struct zone *zone = page_zone(page); + int zonestat; /* prevent PageLRU to go away from under us, and freeze lru stats */ spin_lock_irq(&zone->lru_lock); @@ -1176,12 +1203,23 @@ static void __split_huge_page_refcount(struct page *page) BUG_ON(!PageDirty(page_tail)); BUG_ON(!PageSwapBacked(page_tail)); + mem_cgroup_split_huge_fixup(page, page_tail); + lru_add_page_tail(zone, page, page_tail); } __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); + /* + * A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics, + * so adjust those appropriately if this page is on the LRU. + */ + if (PageLRU(page)) { + zonestat = NR_LRU_BASE + page_lru(page); + __mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1)); + } + ClearPageCompound(page); compound_unlock(page); spin_unlock_irq(&zone->lru_lock); @@ -1353,18 +1391,49 @@ out: return ret; } -int hugepage_madvise(unsigned long *vm_flags) +int hugepage_madvise(struct vm_area_struct *vma, + unsigned long *vm_flags, int advice) { - /* - * Be somewhat over-protective like KSM for now! - */ - if (*vm_flags & (VM_HUGEPAGE | VM_SHARED | VM_MAYSHARE | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | - VM_MIXEDMAP | VM_SAO)) - return -EINVAL; - - *vm_flags |= VM_HUGEPAGE; + switch (advice) { + case MADV_HUGEPAGE: + /* + * Be somewhat over-protective like KSM for now! + */ + if (*vm_flags & (VM_HUGEPAGE | + VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | + VM_MIXEDMAP | VM_SAO)) + return -EINVAL; + *vm_flags &= ~VM_NOHUGEPAGE; + *vm_flags |= VM_HUGEPAGE; + /* + * If the vma become good for khugepaged to scan, + * register it here without waiting a page fault that + * may not happen any time soon. + */ + if (unlikely(khugepaged_enter_vma_merge(vma))) + return -ENOMEM; + break; + case MADV_NOHUGEPAGE: + /* + * Be somewhat over-protective like KSM for now! + */ + if (*vm_flags & (VM_NOHUGEPAGE | + VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | + VM_MIXEDMAP | VM_SAO)) + return -EINVAL; + *vm_flags &= ~VM_HUGEPAGE; + *vm_flags |= VM_NOHUGEPAGE; + /* + * Setting VM_NOHUGEPAGE will prevent khugepaged from scanning + * this vma even if we leave the mm registered in khugepaged if + * it got registered before VM_NOHUGEPAGE was set. + */ + break; + } return 0; } @@ -1616,7 +1685,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, VM_BUG_ON(PageLRU(page)); /* If there is no mapped pte young don't collapse the page */ - if (pte_young(pteval)) + if (pte_young(pteval) || PageReferenced(page) || + mmu_notifier_test_young(vma->vm_mm, address)) referenced = 1; } if (unlikely(!referenced)) @@ -1729,7 +1799,8 @@ static void collapse_huge_page(struct mm_struct *mm, if (address < hstart || address + HPAGE_PMD_SIZE > hend) goto out; - if (!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) + if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) || + (vma->vm_flags & VM_NOHUGEPAGE)) goto out; /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ @@ -1768,9 +1839,9 @@ static void collapse_huge_page(struct mm_struct *mm, spin_lock(ptl); isolated = __collapse_huge_page_isolate(vma, address, pte); spin_unlock(ptl); - pte_unmap(pte); if (unlikely(!isolated)) { + pte_unmap(pte); spin_lock(&mm->page_table_lock); BUG_ON(!pmd_none(*pmd)); set_pmd_at(mm, address, pmd, _pmd); @@ -1787,6 +1858,7 @@ static void collapse_huge_page(struct mm_struct *mm, anon_vma_unlock(vma->anon_vma); __collapse_huge_page_copy(pte, new_page, vma, address, ptl); + pte_unmap(pte); __SetPageUptodate(new_page); pgtable = pmd_pgtable(_pmd); VM_BUG_ON(page_count(pgtable) != 1); @@ -1876,7 +1948,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, /* cannot use mapcount: can't collapse if there's a gup pin */ if (page_count(page) != 1) goto out_unmap; - if (pte_young(pteval)) + if (pte_young(pteval) || PageReferenced(page) || + mmu_notifier_test_young(vma->vm_mm, address)) referenced = 1; } if (referenced) @@ -1951,8 +2024,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, break; } - if (!(vma->vm_flags & VM_HUGEPAGE) && - !khugepaged_always()) { + if ((!(vma->vm_flags & VM_HUGEPAGE) && + !khugepaged_always()) || + (vma->vm_flags & VM_NOHUGEPAGE)) { progress++; continue; } @@ -2067,6 +2141,9 @@ static void khugepaged_do_scan(struct page **hpage) break; #endif + if (unlikely(kthread_should_stop() || freezing(current))) + break; + spin_lock(&khugepaged_mm_lock); if (!khugepaged_scan.mm_slot) pass_through_head++; @@ -2129,6 +2206,9 @@ static void khugepaged_loop(void) if (hpage) put_page(hpage); #endif + try_to_freeze(); + if (unlikely(kthread_should_stop())) + break; if (khugepaged_has_work()) { DEFINE_WAIT(wait); if (!khugepaged_scan_sleep_millisecs) @@ -2139,8 +2219,8 @@ static void khugepaged_loop(void) khugepaged_scan_sleep_millisecs)); remove_wait_queue(&khugepaged_wait, &wait); } else if (khugepaged_enabled()) - wait_event_interruptible(khugepaged_wait, - khugepaged_wait_event()); + wait_event_freezable(khugepaged_wait, + khugepaged_wait_event()); } } @@ -2148,6 +2228,7 @@ static int khugepaged(void *none) { struct mm_slot *mm_slot; + set_freezable(); set_user_nice(current, 19); /* serialize with start_khugepaged() */ @@ -2162,6 +2243,8 @@ static int khugepaged(void *none) mutex_lock(&khugepaged_mutex); if (!khugepaged_enabled()) break; + if (unlikely(kthread_should_stop())) + break; } spin_lock(&khugepaged_mm_lock); @@ -2196,3 +2279,71 @@ void __split_huge_page_pmd(struct mm_struct *mm, pmd_t *pmd) put_page(page); BUG_ON(pmd_trans_huge(*pmd)); } + +static void split_huge_page_address(struct mm_struct *mm, + unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + VM_BUG_ON(!(address & ~HPAGE_PMD_MASK)); + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + return; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + return; + + pmd = pmd_offset(pud, address); + if (!pmd_present(*pmd)) + return; + /* + * Caller holds the mmap_sem write mode, so a huge pmd cannot + * materialize from under us. + */ + split_huge_page_pmd(mm, pmd); +} + +void __vma_adjust_trans_huge(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + long adjust_next) +{ + /* + * If the new start address isn't hpage aligned and it could + * previously contain an hugepage: check if we need to split + * an huge pmd. + */ + if (start & ~HPAGE_PMD_MASK && + (start & HPAGE_PMD_MASK) >= vma->vm_start && + (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end) + split_huge_page_address(vma->vm_mm, start); + + /* + * If the new end address isn't hpage aligned and it could + * previously contain an hugepage: check if we need to split + * an huge pmd. + */ + if (end & ~HPAGE_PMD_MASK && + (end & HPAGE_PMD_MASK) >= vma->vm_start && + (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end) + split_huge_page_address(vma->vm_mm, end); + + /* + * If we're also updating the vma->vm_next->vm_start, if the new + * vm_next->vm_start isn't page aligned and it could previously + * contain an hugepage: check if we need to split an huge pmd. + */ + if (adjust_next > 0) { + struct vm_area_struct *next = vma->vm_next; + unsigned long nstart = next->vm_start; + nstart += adjust_next << PAGE_SHIFT; + if (nstart & ~HPAGE_PMD_MASK && + (nstart & HPAGE_PMD_MASK) >= next->vm_start && + (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end) + split_huge_page_address(next->vm_mm, nstart); + } +}