]> Pileus Git - ~andy/linux/blobdiff - mm/vmscan.c
include/linux/skbuff.h: move CONFIG_XFRM check inside the skb_sec_path()
[~andy/linux] / mm / vmscan.c
index fe715daeb8bc8288b0991be836a3dbcd60cebc85..53f2f82f83ae0d16bf19646cdb5b3bce5fc4e4cf 100644 (file)
@@ -48,6 +48,7 @@
 #include <asm/div64.h>
 
 #include <linux/swapops.h>
+#include <linux/balloon_compaction.h>
 
 #include "internal.h"
 
@@ -174,14 +175,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 }
 
 /*
- * Add a shrinker callback to be called from the vm
+ * Add a shrinker callback to be called from the vm.
  */
-void register_shrinker(struct shrinker *shrinker)
+int register_shrinker(struct shrinker *shrinker)
 {
-       atomic_long_set(&shrinker->nr_in_batch, 0);
+       size_t size = sizeof(*shrinker->nr_deferred);
+
+       /*
+        * If we only have one possible node in the system anyway, save
+        * ourselves the trouble and disable NUMA aware behavior. This way we
+        * will save memory and some small loop time later.
+        */
+       if (nr_node_ids == 1)
+               shrinker->flags &= ~SHRINKER_NUMA_AWARE;
+
+       if (shrinker->flags & SHRINKER_NUMA_AWARE)
+               size *= nr_node_ids;
+
+       shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
+       if (!shrinker->nr_deferred)
+               return -ENOMEM;
+
        down_write(&shrinker_rwsem);
        list_add_tail(&shrinker->list, &shrinker_list);
        up_write(&shrinker_rwsem);
+       return 0;
 }
 EXPORT_SYMBOL(register_shrinker);
 
@@ -196,15 +214,102 @@ void unregister_shrinker(struct shrinker *shrinker)
 }
 EXPORT_SYMBOL(unregister_shrinker);
 
-static inline int do_shrinker_shrink(struct shrinker *shrinker,
-                                    struct shrink_control *sc,
-                                    unsigned long nr_to_scan)
-{
-       sc->nr_to_scan = nr_to_scan;
-       return (*shrinker->shrink)(shrinker, sc);
+#define SHRINK_BATCH 128
+
+static unsigned long
+shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
+                unsigned long nr_pages_scanned, unsigned long lru_pages)
+{
+       unsigned long freed = 0;
+       unsigned long long delta;
+       long total_scan;
+       long max_pass;
+       long nr;
+       long new_nr;
+       int nid = shrinkctl->nid;
+       long batch_size = shrinker->batch ? shrinker->batch
+                                         : SHRINK_BATCH;
+
+       max_pass = shrinker->count_objects(shrinker, shrinkctl);
+       if (max_pass == 0)
+               return 0;
+
+       /*
+        * copy the current shrinker scan count into a local variable
+        * and zero it so that other concurrent shrinker invocations
+        * don't also do this scanning work.
+        */
+       nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
+
+       total_scan = nr;
+       delta = (4 * nr_pages_scanned) / shrinker->seeks;
+       delta *= max_pass;
+       do_div(delta, lru_pages + 1);
+       total_scan += delta;
+       if (total_scan < 0) {
+               printk(KERN_ERR
+               "shrink_slab: %pF negative objects to delete nr=%ld\n",
+                      shrinker->scan_objects, total_scan);
+               total_scan = max_pass;
+       }
+
+       /*
+        * We need to avoid excessive windup on filesystem shrinkers
+        * due to large numbers of GFP_NOFS allocations causing the
+        * shrinkers to return -1 all the time. This results in a large
+        * nr being built up so when a shrink that can do some work
+        * comes along it empties the entire cache due to nr >>>
+        * max_pass.  This is bad for sustaining a working set in
+        * memory.
+        *
+        * Hence only allow the shrinker to scan the entire cache when
+        * a large delta change is calculated directly.
+        */
+       if (delta < max_pass / 4)
+               total_scan = min(total_scan, max_pass / 2);
+
+       /*
+        * Avoid risking looping forever due to too large nr value:
+        * never try to free more than twice the estimate number of
+        * freeable entries.
+        */
+       if (total_scan > max_pass * 2)
+               total_scan = max_pass * 2;
+
+       trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
+                               nr_pages_scanned, lru_pages,
+                               max_pass, delta, total_scan);
+
+       while (total_scan >= batch_size) {
+               unsigned long ret;
+
+               shrinkctl->nr_to_scan = batch_size;
+               ret = shrinker->scan_objects(shrinker, shrinkctl);
+               if (ret == SHRINK_STOP)
+                       break;
+               freed += ret;
+
+               count_vm_events(SLABS_SCANNED, batch_size);
+               total_scan -= batch_size;
+
+               cond_resched();
+       }
+
+       /*
+        * move the unused scan count back into the shrinker in a
+        * manner that handles concurrent updates. If we exhausted the
+        * scan, there is no need to do an update.
+        */
+       if (total_scan > 0)
+               new_nr = atomic_long_add_return(total_scan,
+                                               &shrinker->nr_deferred[nid]);
+       else
+               new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
+
+       trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
+       return freed;
 }
 
-#define SHRINK_BATCH 128
 /*
  * Call the shrink functions to age shrinkable caches
  *
@@ -224,115 +329,45 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker,
  *
  * Returns the number of slab objects which we shrunk.
  */
-unsigned long shrink_slab(struct shrink_control *shrink,
+unsigned long shrink_slab(struct shrink_control *shrinkctl,
                          unsigned long nr_pages_scanned,
                          unsigned long lru_pages)
 {
        struct shrinker *shrinker;
-       unsigned long ret = 0;
+       unsigned long freed = 0;
 
        if (nr_pages_scanned == 0)
                nr_pages_scanned = SWAP_CLUSTER_MAX;
 
        if (!down_read_trylock(&shrinker_rwsem)) {
-               /* Assume we'll be able to shrink next time */
-               ret = 1;
+               /*
+                * If we would return 0, our callers would understand that we
+                * have nothing else to shrink and give up trying. By returning
+                * 1 we keep it going and assume we'll be able to shrink next
+                * time.
+                */
+               freed = 1;
                goto out;
        }
 
        list_for_each_entry(shrinker, &shrinker_list, list) {
-               unsigned long long delta;
-               long total_scan;
-               long max_pass;
-               int shrink_ret = 0;
-               long nr;
-               long new_nr;
-               long batch_size = shrinker->batch ? shrinker->batch
-                                                 : SHRINK_BATCH;
-
-               max_pass = do_shrinker_shrink(shrinker, shrink, 0);
-               if (max_pass <= 0)
-                       continue;
-
-               /*
-                * copy the current shrinker scan count into a local variable
-                * and zero it so that other concurrent shrinker invocations
-                * don't also do this scanning work.
-                */
-               nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
-
-               total_scan = nr;
-               delta = (4 * nr_pages_scanned) / shrinker->seeks;
-               delta *= max_pass;
-               do_div(delta, lru_pages + 1);
-               total_scan += delta;
-               if (total_scan < 0) {
-                       printk(KERN_ERR "shrink_slab: %pF negative objects to "
-                              "delete nr=%ld\n",
-                              shrinker->shrink, total_scan);
-                       total_scan = max_pass;
-               }
-
-               /*
-                * We need to avoid excessive windup on filesystem shrinkers
-                * due to large numbers of GFP_NOFS allocations causing the
-                * shrinkers to return -1 all the time. This results in a large
-                * nr being built up so when a shrink that can do some work
-                * comes along it empties the entire cache due to nr >>>
-                * max_pass.  This is bad for sustaining a working set in
-                * memory.
-                *
-                * Hence only allow the shrinker to scan the entire cache when
-                * a large delta change is calculated directly.
-                */
-               if (delta < max_pass / 4)
-                       total_scan = min(total_scan, max_pass / 2);
-
-               /*
-                * Avoid risking looping forever due to too large nr value:
-                * never try to free more than twice the estimate number of
-                * freeable entries.
-                */
-               if (total_scan > max_pass * 2)
-                       total_scan = max_pass * 2;
-
-               trace_mm_shrink_slab_start(shrinker, shrink, nr,
-                                       nr_pages_scanned, lru_pages,
-                                       max_pass, delta, total_scan);
-
-               while (total_scan >= batch_size) {
-                       int nr_before;
+               for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
+                       if (!node_online(shrinkctl->nid))
+                               continue;
 
-                       nr_before = do_shrinker_shrink(shrinker, shrink, 0);
-                       shrink_ret = do_shrinker_shrink(shrinker, shrink,
-                                                       batch_size);
-                       if (shrink_ret == -1)
+                       if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
+                           (shrinkctl->nid != 0))
                                break;
-                       if (shrink_ret < nr_before)
-                               ret += nr_before - shrink_ret;
-                       count_vm_events(SLABS_SCANNED, batch_size);
-                       total_scan -= batch_size;
 
-                       cond_resched();
-               }
+                       freed += shrink_slab_node(shrinkctl, shrinker,
+                                nr_pages_scanned, lru_pages);
 
-               /*
-                * move the unused scan count back into the shrinker in a
-                * manner that handles concurrent updates. If we exhausted the
-                * scan, there is no need to do an update.
-                */
-               if (total_scan > 0)
-                       new_nr = atomic_long_add_return(total_scan,
-                                       &shrinker->nr_in_batch);
-               else
-                       new_nr = atomic_long_read(&shrinker->nr_in_batch);
-
-               trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
+               }
        }
        up_read(&shrinker_rwsem);
 out:
        cond_resched();
-       return ret;
+       return freed;
 }
 
 static inline int is_page_cache_freeable(struct page *page)
@@ -1079,7 +1114,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
        LIST_HEAD(clean_pages);
 
        list_for_each_entry_safe(page, next, page_list, lru) {
-               if (page_is_file_cache(page) && !PageDirty(page)) {
+               if (page_is_file_cache(page) && !PageDirty(page) &&
+                   !isolated_balloon_page(page)) {
                        ClearPageActive(page);
                        list_move(&page->lru, &clean_pages);
                }
@@ -2368,12 +2404,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                 */
                if (global_reclaim(sc)) {
                        unsigned long lru_pages = 0;
+
+                       nodes_clear(shrink->nodes_to_scan);
                        for_each_zone_zonelist(zone, z, zonelist,
                                        gfp_zone(sc->gfp_mask)) {
                                if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                        continue;
 
                                lru_pages += zone_reclaimable_pages(zone);
+                               node_set(zone_to_nid(zone),
+                                        shrink->nodes_to_scan);
                        }
 
                        shrink_slab(shrink, sc->nr_scanned, lru_pages);
@@ -2829,6 +2869,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
                return true;
 
        shrink_zone(zone, sc);
+       nodes_clear(shrink.nodes_to_scan);
+       node_set(zone_to_nid(zone), shrink.nodes_to_scan);
 
        reclaim_state->reclaimed_slab = 0;
        shrink_slab(&shrink, sc->nr_scanned, lru_pages);
@@ -3520,10 +3562,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                 * number of slab pages and shake the slab until it is reduced
                 * by the same nr_pages that we used for reclaiming unmapped
                 * pages.
-                *
-                * Note that shrink_slab will free memory on all zones and may
-                * take a long time.
                 */
+               nodes_clear(shrink.nodes_to_scan);
+               node_set(zone_to_nid(zone), shrink.nodes_to_scan);
                for (;;) {
                        unsigned long lru_pages = zone_reclaimable_pages(zone);