Merge branch 'for-2.6.37' into for-2.6.38

[~andy/linux] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index bde42c6d3633f15cfe51b1eae8e3e56833e86955..0e18b4d649ec82abc83c208e5f9dce9cbb2cf905 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -307,7 +307,6 @@ void free_pgd_range(struct mmu_gather *tlb,
  {
         pgd_t *pgd;
         unsigned long next;
-       unsigned long start;
  
         /*
          * The next few lines have given us lots of grief...
@@ -351,7 +350,6 @@ void free_pgd_range(struct mmu_gather *tlb,
         if (addr > end - 1)
                 return;
  
-       start = addr;
         pgd = pgd_offset(tlb->mm, addr);
         do {
                 next = pgd_addr_end(addr, end);
@@ -2008,11 +2006,10 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
  {
         pgd_t *pgd;
         unsigned long next;
-       unsigned long start = addr, end = addr + size;
+       unsigned long end = addr + size;
         int err;
  
         BUG_ON(addr >= end);
-       mmu_notifier_invalidate_range_start(mm, start, end);
         pgd = pgd_offset(mm, addr);
         do {
                 next = pgd_addr_end(addr, end);
@@ -2020,7 +2017,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
                 if (err)
                         break;
         } while (pgd++, addr = next, addr != end);
-       mmu_notifier_invalidate_range_end(mm, start, end);
+
         return err;
  }
  EXPORT_SYMBOL_GPL(apply_to_page_range);
@@ -2626,10 +2623,11 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 unsigned int flags, pte_t orig_pte)
  {
         spinlock_t *ptl;
-       struct page *page;
+       struct page *page, *swapcache = NULL;
         swp_entry_t entry;
         pte_t pte;
         struct mem_cgroup *ptr = NULL;
+       int exclusive = 0;
         int ret = 0;
  
         if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
@@ -2681,10 +2679,25 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         lock_page(page);
         delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
  
-       page = ksm_might_need_to_copy(page, vma, address);
-       if (!page) {
-               ret = VM_FAULT_OOM;
-               goto out;
+       /*
+        * Make sure try_to_free_swap or reuse_swap_page or swapoff did not
+        * release the swapcache from under us.  The page pin, and pte_same
+        * test below, are not enough to exclude that.  Even if it is still
+        * swapcache, we need to check that the page's swap has not changed.
+        */
+       if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))
+               goto out_page;
+
+       if (ksm_might_need_to_copy(page, vma, address)) {
+               swapcache = page;
+               page = ksm_does_need_to_copy(page, vma, address);
+
+               if (unlikely(!page)) {
+                       ret = VM_FAULT_OOM;
+                       page = swapcache;
+                       swapcache = NULL;
+                       goto out_page;
+               }
         }
  
         if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
@@ -2724,10 +2737,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
                 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
                 flags &= ~FAULT_FLAG_WRITE;
+               ret |= VM_FAULT_WRITE;
+               exclusive = 1;
         }
         flush_icache_page(vma, page);
         set_pte_at(mm, address, page_table, pte);
-       page_add_anon_rmap(page, vma, address);
+       do_page_add_anon_rmap(page, vma, address, exclusive);
         /* It's better to call commit-charge after rmap is established */
         mem_cgroup_commit_charge_swapin(page, ptr);
  
@@ -2735,6 +2750,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
                 try_to_free_swap(page);
         unlock_page(page);
+       if (swapcache) {
+               /*
+                * Hold the lock to avoid the swap entry to be reused
+                * until we take the PT lock for the pte_same() check
+                * (to avoid false positives from pte_same). For
+                * further safety release the lock after the swap_free
+                * so that the swap count won't change under a
+                * parallel locked swapcache.
+                */
+               unlock_page(swapcache);
+               page_cache_release(swapcache);
+       }
  
         if (flags & FAULT_FLAG_WRITE) {
                 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
@@ -2756,9 +2783,47 @@ out_page:
         unlock_page(page);
  out_release:
         page_cache_release(page);
+       if (swapcache) {
+               unlock_page(swapcache);
+               page_cache_release(swapcache);
+       }
         return ret;
  }
  
+/*
+ * This is like a special single-page "expand_{down|up}wards()",
+ * except we must first make sure that 'address{-|+}PAGE_SIZE'
+ * doesn't hit another vma.
+ */
+static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
+{
+       address &= PAGE_MASK;
+       if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
+               struct vm_area_struct *prev = vma->vm_prev;
+
+               /*
+                * Is there a mapping abutting this one below?
+                *
+                * That's only ok if it's the same stack mapping
+                * that has gotten split..
+                */
+               if (prev && prev->vm_end == address)
+                       return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+
+               expand_stack(vma, address - PAGE_SIZE);
+       }
+       if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+               struct vm_area_struct *next = vma->vm_next;
+
+               /* As VM_GROWSDOWN but s/below/above/ */
+               if (next && next->vm_start == address + PAGE_SIZE)
+                       return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+
+               expand_upwards(vma, address + PAGE_SIZE);
+       }
+       return 0;
+}
+
  /*
   * We enter with non-exclusive mmap_sem (to exclude vma changes,
   * but allow concurrent faults), and pte mapped but not yet locked.
@@ -2772,19 +2837,23 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
         spinlock_t *ptl;
         pte_t entry;
  
+       pte_unmap(page_table);
+
+       /* Check if we need to add a guard page to the stack */
+       if (check_stack_guard_page(vma, address) < 0)
+               return VM_FAULT_SIGBUS;
+
+       /* Use the zero-page for reads */
         if (!(flags & FAULT_FLAG_WRITE)) {
                 entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
                                                 vma->vm_page_prot));
-               ptl = pte_lockptr(mm, pmd);
-               spin_lock(ptl);
+               page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
                 if (!pte_none(*page_table))
                         goto unlock;
                 goto setpte;
         }
  
         /* Allocate our own private page. */
-       pte_unmap(page_table);
-
         if (unlikely(anon_vma_prepare(vma)))
                 goto oom;
         page = alloc_zeroed_user_highpage_movable(vma, address);