netfilter: ipv4, defrag: switch hook PFs to nfproto

[~andy/linux] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 75198dac3fe8877a234d8a86c4723dc7bc4578c0..ac35bccadb7b9f53606d445a961e442e891aa94a 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -258,8 +258,8 @@ struct mem_cgroup {
                  */
                 struct rcu_head rcu_freeing;
                 /*
-                * But when using vfree(), that cannot be done at
-                * interrupt time, so we must then queue the work.
+                * We also need some space for a worker in deferred freeing.
+                * By the time we call it, rcu_freeing is no longer in use.
                  */
                 struct work_struct work_freeing;
         };
@@ -417,6 +417,7 @@ void sock_update_memcg(struct sock *sk)
  {
         if (mem_cgroup_sockets_enabled) {
                 struct mem_cgroup *memcg;
+               struct cg_proto *cg_proto;
  
                 BUG_ON(!sk->sk_prot->proto_cgroup);
  
@@ -436,9 +437,10 @@ void sock_update_memcg(struct sock *sk)
  
                 rcu_read_lock();
                 memcg = mem_cgroup_from_task(current);
-               if (!mem_cgroup_is_root(memcg)) {
+               cg_proto = sk->sk_prot->proto_cgroup(memcg);
+               if (!mem_cgroup_is_root(memcg) && memcg_proto_active(cg_proto)) {
                         mem_cgroup_get(memcg);
-                       sk->sk_cgrp = sk->sk_prot->proto_cgroup(memcg);
+                       sk->sk_cgrp = cg_proto;
                 }
                 rcu_read_unlock();
         }
@@ -467,6 +469,19 @@ EXPORT_SYMBOL(tcp_proto_cgroup);
  #endif /* CONFIG_INET */
  #endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
  
+#if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM)
+static void disarm_sock_keys(struct mem_cgroup *memcg)
+{
+       if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto))
+               return;
+       static_key_slow_dec(&memcg_socket_limit_enabled);
+}
+#else
+static void disarm_sock_keys(struct mem_cgroup *memcg)
+{
+}
+#endif
+
  static void drain_all_stock_async(struct mem_cgroup *memcg);
  
  static struct mem_cgroup_per_zone *
@@ -1035,7 +1050,7 @@ EXPORT_SYMBOL(mem_cgroup_count_vm_event);
  /**
   * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg
   * @zone: zone of the wanted lruvec
- * @mem: memcg of the wanted lruvec
+ * @memcg: memcg of the wanted lruvec
   *
   * Returns the lru list vector holding pages for the given @zone and
   * @mem.  This can be the global zone lruvec, if the memory controller
@@ -1068,19 +1083,11 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
   */
  
  /**
- * mem_cgroup_lru_add_list - account for adding an lru page and return lruvec
- * @zone: zone of the page
+ * mem_cgroup_page_lruvec - return lruvec for adding an lru page
   * @page: the page
- * @lru: current lru
- *
- * This function accounts for @page being added to @lru, and returns
- * the lruvec for the given @zone and the memcg @page is charged to.
- *
- * The callsite is then responsible for physically linking the page to
- * the returned lruvec->lists[@lru].
+ * @zone: zone of the page
   */
-struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
-                                      enum lru_list lru)
+struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
  {
         struct mem_cgroup_per_zone *mz;
         struct mem_cgroup *memcg;
@@ -1093,7 +1100,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
         memcg = pc->mem_cgroup;
  
         /*
-        * Surreptitiously switch any uncharged page to root:
+        * Surreptitiously switch any uncharged offlist page to root:
          * an uncharged page off lru does nothing to secure
          * its former mem_cgroup from sudden removal.
          *
@@ -1101,65 +1108,35 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
          * under page_cgroup lock: between them, they make all uses
          * of pc->mem_cgroup safe.
          */
-       if (!PageCgroupUsed(pc) && memcg != root_mem_cgroup)
+       if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
                 pc->mem_cgroup = memcg = root_mem_cgroup;
  
         mz = page_cgroup_zoneinfo(memcg, page);
-       /* compound_order() is stabilized through lru_lock */
-       mz->lru_size[lru] += 1 << compound_order(page);
         return &mz->lruvec;
  }
  
  /**
- * mem_cgroup_lru_del_list - account for removing an lru page
- * @page: the page
- * @lru: target lru
+ * mem_cgroup_update_lru_size - account for adding or removing an lru page
+ * @lruvec: mem_cgroup per zone lru vector
+ * @lru: index of lru list the page is sitting on
+ * @nr_pages: positive when adding or negative when removing
   *
- * This function accounts for @page being removed from @lru.
- *
- * The callsite is then responsible for physically unlinking
- * @page->lru.
+ * This function must be called when a page is added to or removed from an
+ * lru list.
   */
-void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
+void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
+                               int nr_pages)
  {
         struct mem_cgroup_per_zone *mz;
-       struct mem_cgroup *memcg;
-       struct page_cgroup *pc;
+       unsigned long *lru_size;
  
         if (mem_cgroup_disabled())
                 return;
  
-       pc = lookup_page_cgroup(page);
-       memcg = pc->mem_cgroup;
-       VM_BUG_ON(!memcg);
-       mz = page_cgroup_zoneinfo(memcg, page);
-       /* huge page split is done under lru_lock. so, we have no races. */
-       VM_BUG_ON(mz->lru_size[lru] < (1 << compound_order(page)));
-       mz->lru_size[lru] -= 1 << compound_order(page);
-}
-
-/**
- * mem_cgroup_lru_move_lists - account for moving a page between lrus
- * @zone: zone of the page
- * @page: the page
- * @from: current lru
- * @to: target lru
- *
- * This function accounts for @page being moved between the lrus @from
- * and @to, and returns the lruvec for the given @zone and the memcg
- * @page is charged to.
- *
- * The callsite is then responsible for physically relinking
- * @page->lru to the returned lruvec->lists[@to].
- */
-struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
-                                        struct page *page,
-                                        enum lru_list from,
-                                        enum lru_list to)
-{
-       /* XXX: Optimize this, especially for @from == @to */
-       mem_cgroup_lru_del_list(page, from);
-       return mem_cgroup_lru_add_list(zone, page, to);
+       mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
+       lru_size = mz->lru_size + lru;
+       *lru_size += nr_pages;
+       VM_BUG_ON((long)(*lru_size) < 0);
  }
  
  /*
@@ -1252,24 +1229,6 @@ int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
         return (active > inactive);
  }
  
-struct zone_reclaim_stat *
-mem_cgroup_get_reclaim_stat_from_page(struct page *page)
-{
-       struct page_cgroup *pc;
-       struct mem_cgroup_per_zone *mz;
-
-       if (mem_cgroup_disabled())
-               return NULL;
-
-       pc = lookup_page_cgroup(page);
-       if (!PageCgroupUsed(pc))
-               return NULL;
-       /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-       smp_rmb();
-       mz = page_cgroup_zoneinfo(pc->mem_cgroup, page);
-       return &mz->lruvec.reclaim_stat;
-}
-
  #define mem_cgroup_from_res_counter(counter, member)   \
         container_of(counter, struct mem_cgroup, member)
  
@@ -2509,6 +2468,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
  {
         struct page_cgroup *pc = lookup_page_cgroup(page);
         struct zone *uninitialized_var(zone);
+       struct lruvec *lruvec;
         bool was_on_lru = false;
         bool anon;
  
@@ -2531,8 +2491,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
                 zone = page_zone(page);
                 spin_lock_irq(&zone->lru_lock);
                 if (PageLRU(page)) {
+                       lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
                         ClearPageLRU(page);
-                       del_page_from_lru_list(zone, page, page_lru(page));
+                       del_page_from_lru_list(page, lruvec, page_lru(page));
                         was_on_lru = true;
                 }
         }
@@ -2550,9 +2511,10 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
  
         if (lrucare) {
                 if (was_on_lru) {
+                       lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
                         VM_BUG_ON(PageLRU(page));
                         SetPageLRU(page);
-                       add_page_to_lru_list(zone, page, page_lru(page));
+                       add_page_to_lru_list(page, lruvec, page_lru(page));
                 }
                 spin_unlock_irq(&zone->lru_lock);
         }
@@ -4755,23 +4717,40 @@ out_free:
  }
  
  /*
- * Helpers for freeing a vzalloc()ed mem_cgroup by RCU,
+ * Helpers for freeing a kmalloc()ed/vzalloc()ed mem_cgroup by RCU,
   * but in process context.  The work_freeing structure is overlaid
   * on the rcu_freeing structure, which itself is overlaid on memsw.
   */
-static void vfree_work(struct work_struct *work)
+static void free_work(struct work_struct *work)
  {
         struct mem_cgroup *memcg;
+       int size = sizeof(struct mem_cgroup);
  
         memcg = container_of(work, struct mem_cgroup, work_freeing);
-       vfree(memcg);
+       /*
+        * We need to make sure that (at least for now), the jump label
+        * destruction code runs outside of the cgroup lock. This is because
+        * get_online_cpus(), which is called from the static_branch update,
+        * can't be called inside the cgroup_lock. cpusets are the ones
+        * enforcing this dependency, so if they ever change, we might as well.
+        *
+        * schedule_work() will guarantee this happens. Be careful if you need
+        * to move this code around, and make sure it is outside
+        * the cgroup_lock.
+        */
+       disarm_sock_keys(memcg);
+       if (size < PAGE_SIZE)
+               kfree(memcg);
+       else
+               vfree(memcg);
  }
-static void vfree_rcu(struct rcu_head *rcu_head)
+
+static void free_rcu(struct rcu_head *rcu_head)
  {
         struct mem_cgroup *memcg;
  
         memcg = container_of(rcu_head, struct mem_cgroup, rcu_freeing);
-       INIT_WORK(&memcg->work_freeing, vfree_work);
+       INIT_WORK(&memcg->work_freeing, free_work);
         schedule_work(&memcg->work_freeing);
  }
  
@@ -4797,10 +4776,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
                 free_mem_cgroup_per_zone_info(memcg, node);
  
         free_percpu(memcg->stat);
-       if (sizeof(struct mem_cgroup) < PAGE_SIZE)
-               kfree_rcu(memcg, rcu_freeing);
-       else
-               call_rcu(&memcg->rcu_freeing, vfree_rcu);
+       call_rcu(&memcg->rcu_freeing, free_rcu);
  }
  
  static void mem_cgroup_get(struct mem_cgroup *memcg)