mm: vmscan: have kswapd writeback pages based on dirty pages encountered, not priority

author Mel Gorman <mgorman@suse.de>

Wed, 3 Jul 2013 22:01:50 +0000 (15:01 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 3 Jul 2013 23:07:28 +0000 (16:07 -0700)
author Mel Gorman <mgorman@suse.de>
Wed, 3 Jul 2013 22:01:50 +0000 (15:01 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 3 Jul 2013 23:07:28 +0000 (16:07 -0700)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 5c76737d836b1edc218bfba700fb26c09e62408a..2aaf72f7e345f0841261db1a25a8c91e529f6f07 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -495,6 +495,10 @@ typedef enum {
         ZONE_CONGESTED,                 /* zone has many dirty pages backed by
                                          * a congested BDI
                                          */
+       ZONE_TAIL_LRU_DIRTY,            /* reclaim scanning has recently found
+                                        * many dirty file pages at the tail
+                                        * of the LRU.
+                                        */
  } zone_flags_t;
  
  static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
@@ -517,6 +521,11 @@ static inline int zone_is_reclaim_congested(const struct zone *zone)
         return test_bit(ZONE_CONGESTED, &zone->flags);
  }
  
+static inline int zone_is_reclaim_dirty(const struct zone *zone)
+{
+       return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags);
+}
+
  static inline int zone_is_reclaim_locked(const struct zone *zone)
  {
         return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 1505c573719d9c4722eb6c52e915dfdb5e8c8353..d6c916d808ba2248e112a3dc3767647abf857fc3 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -676,13 +676,14 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                       struct zone *zone,
                                       struct scan_control *sc,
                                       enum ttu_flags ttu_flags,
-                                     unsigned long *ret_nr_dirty,
+                                     unsigned long *ret_nr_unqueued_dirty,
                                       unsigned long *ret_nr_writeback,
                                       bool force_reclaim)
  {
         LIST_HEAD(ret_pages);
         LIST_HEAD(free_pages);
         int pgactivate = 0;
+       unsigned long nr_unqueued_dirty = 0;
         unsigned long nr_dirty = 0;
         unsigned long nr_congested = 0;
         unsigned long nr_reclaimed = 0;
@@ -808,14 +809,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                 if (PageDirty(page)) {
                         nr_dirty++;
  
+                       if (!PageWriteback(page))
+                               nr_unqueued_dirty++;
+
                         /*
                          * Only kswapd can writeback filesystem pages to
-                        * avoid risk of stack overflow but do not writeback
-                        * unless under significant pressure.
+                        * avoid risk of stack overflow but only writeback
+                        * if many dirty pages have been encountered.
                          */
                         if (page_is_file_cache(page) &&
                                         (!current_is_kswapd() ||
-                                        sc->priority >= DEF_PRIORITY - 2)) {
+                                        !zone_is_reclaim_dirty(zone))) {
                                 /*
                                  * Immediately reclaim when written back.
                                  * Similar in principal to deactivate_page()
@@ -960,7 +964,7 @@ keep:
         list_splice(&ret_pages, page_list);
         count_vm_events(PGACTIVATE, pgactivate);
         mem_cgroup_uncharge_end();
-       *ret_nr_dirty += nr_dirty;
+       *ret_nr_unqueued_dirty += nr_unqueued_dirty;
         *ret_nr_writeback += nr_writeback;
         return nr_reclaimed;
  }
@@ -1373,6 +1377,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
                         (nr_taken >> (DEF_PRIORITY - sc->priority)))
                 wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10);
  
+       /*
+        * Similarly, if many dirty pages are encountered that are not
+        * currently being written then flag that kswapd should start
+        * writing back pages.
+        */
+       if (global_reclaim(sc) && nr_dirty &&
+                       nr_dirty >= (nr_taken >> (DEF_PRIORITY - sc->priority)))
+               zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
+
         trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id,
                 zone_idx(zone),
                 nr_scanned, nr_reclaimed,
@@ -2769,8 +2782,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                                 end_zone = i;
                                 break;
                         } else {
-                               /* If balanced, clear the congested flag */
+                               /*
+                                * If balanced, clear the dirty and congested
+                                * flags
+                                */
                                 zone_clear_flag(zone, ZONE_CONGESTED);
+                               zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
                         }
                 }
  
@@ -2888,8 +2905,10 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                                  * possible there are dirty pages backed by
                                  * congested BDIs but as pressure is relieved,
                                  * speculatively avoid congestion waits
+                                * or writing pages from kswapd context.
                                  */
                                 zone_clear_flag(zone, ZONE_CONGESTED);
+                               zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
                 }
  
                 /*
author	Mel Gorman <mgorman@suse.de>
	Wed, 3 Jul 2013 22:01:50 +0000 (15:01 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 3 Jul 2013 23:07:28 +0000 (16:07 -0700)
include/linux/mmzone.h		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history