cgroup: fix memory leak in cgroup_rm_cftypes()

[~andy/linux] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 2a9926275f806f41e7c15b6eed584acaf8796bc0..1d4f471de8d54e508ff2fb9ca99d2be5924b5c03 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -63,9 +63,6 @@
  
  #include <linux/atomic.h>
  
-/* css deactivation bias, makes css->refcnt negative to deny new trygets */
-#define CSS_DEACT_BIAS         INT_MIN
-
  /*
   * cgroup_mutex is the master lock.  Any modification to cgroup or its
   * hierarchy must be performed while holding it.
@@ -189,9 +186,12 @@ struct cgroup_event {
  static LIST_HEAD(roots);
  static int root_count;
  
-static DEFINE_IDA(hierarchy_ida);
-static int next_hierarchy_id;
-static DEFINE_SPINLOCK(hierarchy_id_lock);
+/*
+ * Hierarchy ID allocation and mapping.  It follows the same exclusion
+ * rules as other root ops - both cgroup_mutex and cgroup_root_mutex for
+ * writes, either for reads.
+ */
+static DEFINE_IDR(cgroup_hierarchy_idr);
  
  /* dummytop is a shorthand for the dummy hierarchy's top cgroup */
  #define dummytop (&rootnode.top_cgroup)
@@ -205,27 +205,15 @@ static struct cgroup_name root_cgroup_name = { .name = "/" };
   */
  static int need_forkexit_callback __read_mostly;
  
+static void cgroup_offline_fn(struct work_struct *work);
  static int cgroup_destroy_locked(struct cgroup *cgrp);
  static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
                               struct cftype cfts[], bool is_add);
  
-static int css_unbias_refcnt(int refcnt)
-{
-       return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS;
-}
-
-/* the current nr of refs, always >= 0 whether @css is deactivated or not */
-static int css_refcnt(struct cgroup_subsys_state *css)
-{
-       int v = atomic_read(&css->refcnt);
-
-       return css_unbias_refcnt(v);
-}
-
  /* convenient tests for these bits */
-inline int cgroup_is_removed(const struct cgroup *cgrp)
+static inline bool cgroup_is_dead(const struct cgroup *cgrp)
  {
-       return test_bit(CGRP_REMOVED, &cgrp->flags);
+       return test_bit(CGRP_DEAD, &cgrp->flags);
  }
  
  /**
@@ -297,7 +285,7 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
  static bool cgroup_lock_live_group(struct cgroup *cgrp)
  {
         mutex_lock(&cgroup_mutex);
-       if (cgroup_is_removed(cgrp)) {
+       if (cgroup_is_dead(cgrp)) {
                 mutex_unlock(&cgroup_mutex);
                 return false;
         }
@@ -312,20 +300,24 @@ static void cgroup_release_agent(struct work_struct *work);
  static DECLARE_WORK(release_agent_work, cgroup_release_agent);
  static void check_for_release(struct cgroup *cgrp);
  
-/* Link structure for associating css_set objects with cgroups */
-struct cg_cgroup_link {
-       /*
-        * List running through cg_cgroup_links associated with a
-        * cgroup, anchored on cgroup->css_sets
-        */
-       struct list_head cgrp_link_list;
-       struct cgroup *cgrp;
-       /*
-        * List running through cg_cgroup_links pointing at a
-        * single css_set object, anchored on css_set->cg_links
-        */
-       struct list_head cg_link_list;
-       struct css_set *cg;
+/*
+ * A cgroup can be associated with multiple css_sets as different tasks may
+ * belong to different cgroups on different hierarchies.  In the other
+ * direction, a css_set is naturally associated with multiple cgroups.
+ * This M:N relationship is represented by the following link structure
+ * which exists for each association and allows traversing the associations
+ * from both sides.
+ */
+struct cgrp_cset_link {
+       /* the cgroup and css_set this link associates */
+       struct cgroup           *cgrp;
+       struct css_set          *cset;
+
+       /* list of cgrp_cset_links anchored at cgrp->cset_links */
+       struct list_head        cset_link;
+
+       /* list of cgrp_cset_links anchored at css_set->cgrp_links */
+       struct list_head        cgrp_link;
  };
  
  /* The default css_set - used by init and its children prior to any
@@ -336,7 +328,7 @@ struct cg_cgroup_link {
   */
  
  static struct css_set init_css_set;
-static struct cg_cgroup_link init_css_set_link;
+static struct cgrp_cset_link init_cgrp_cset_link;
  
  static int cgroup_init_idr(struct cgroup_subsys *ss,
                            struct cgroup_subsys_state *css);
@@ -373,90 +365,83 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
   * compiled into their kernel but not actually in use */
  static int use_task_css_set_links __read_mostly;
  
-static void __put_css_set(struct css_set *cg, int taskexit)
+static void __put_css_set(struct css_set *cset, int taskexit)
  {
-       struct cg_cgroup_link *link;
-       struct cg_cgroup_link *saved_link;
+       struct cgrp_cset_link *link, *tmp_link;
+
         /*
          * Ensure that the refcount doesn't hit zero while any readers
          * can see it. Similar to atomic_dec_and_lock(), but for an
          * rwlock
          */
-       if (atomic_add_unless(&cg->refcount, -1, 1))
+       if (atomic_add_unless(&cset->refcount, -1, 1))
                 return;
         write_lock(&css_set_lock);
-       if (!atomic_dec_and_test(&cg->refcount)) {
+       if (!atomic_dec_and_test(&cset->refcount)) {
                 write_unlock(&css_set_lock);
                 return;
         }
  
         /* This css_set is dead. unlink it and release cgroup refcounts */
-       hash_del(&cg->hlist);
+       hash_del(&cset->hlist);
         css_set_count--;
  
-       list_for_each_entry_safe(link, saved_link, &cg->cg_links,
-                                cg_link_list) {
+       list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
                 struct cgroup *cgrp = link->cgrp;
-               list_del(&link->cg_link_list);
-               list_del(&link->cgrp_link_list);
  
-               /*
-                * We may not be holding cgroup_mutex, and if cgrp->count is
-                * dropped to 0 the cgroup can be destroyed at any time, hence
-                * rcu_read_lock is used to keep it alive.
-                */
-               rcu_read_lock();
-               if (atomic_dec_and_test(&cgrp->count) &&
-                   notify_on_release(cgrp)) {
+               list_del(&link->cset_link);
+               list_del(&link->cgrp_link);
+
+               /* @cgrp can't go away while we're holding css_set_lock */
+               if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) {
                         if (taskexit)
                                 set_bit(CGRP_RELEASABLE, &cgrp->flags);
                         check_for_release(cgrp);
                 }
-               rcu_read_unlock();
  
                 kfree(link);
         }
  
         write_unlock(&css_set_lock);
-       kfree_rcu(cg, rcu_head);
+       kfree_rcu(cset, rcu_head);
  }
  
  /*
   * refcounted get/put for css_set objects
   */
-static inline void get_css_set(struct css_set *cg)
+static inline void get_css_set(struct css_set *cset)
  {
-       atomic_inc(&cg->refcount);
+       atomic_inc(&cset->refcount);
  }
  
-static inline void put_css_set(struct css_set *cg)
+static inline void put_css_set(struct css_set *cset)
  {
-       __put_css_set(cg, 0);
+       __put_css_set(cset, 0);
  }
  
-static inline void put_css_set_taskexit(struct css_set *cg)
+static inline void put_css_set_taskexit(struct css_set *cset)
  {
-       __put_css_set(cg, 1);
+       __put_css_set(cset, 1);
  }
  
  /*
   * compare_css_sets - helper function for find_existing_css_set().
- * @cg: candidate css_set being tested
- * @old_cg: existing css_set for a task
+ * @cset: candidate css_set being tested
+ * @old_cset: existing css_set for a task
   * @new_cgrp: cgroup that's being entered by the task
   * @template: desired set of css pointers in css_set (pre-calculated)
   *
   * Returns true if "cg" matches "old_cg" except for the hierarchy
   * which "new_cgrp" belongs to, for which it should match "new_cgrp".
   */
-static bool compare_css_sets(struct css_set *cg,
-                            struct css_set *old_cg,
+static bool compare_css_sets(struct css_set *cset,
+                            struct css_set *old_cset,
                              struct cgroup *new_cgrp,
                              struct cgroup_subsys_state *template[])
  {
         struct list_head *l1, *l2;
  
-       if (memcmp(template, cg->subsys, sizeof(cg->subsys))) {
+       if (memcmp(template, cset->subsys, sizeof(cset->subsys))) {
                 /* Not all subsystems matched */
                 return false;
         }
@@ -470,28 +455,28 @@ static bool compare_css_sets(struct css_set *cg,
          * candidates.
          */
  
-       l1 = &cg->cg_links;
-       l2 = &old_cg->cg_links;
+       l1 = &cset->cgrp_links;
+       l2 = &old_cset->cgrp_links;
         while (1) {
-               struct cg_cgroup_link *cgl1, *cgl2;
-               struct cgroup *cg1, *cg2;
+               struct cgrp_cset_link *link1, *link2;
+               struct cgroup *cgrp1, *cgrp2;
  
                 l1 = l1->next;
                 l2 = l2->next;
                 /* See if we reached the end - both lists are equal length. */
-               if (l1 == &cg->cg_links) {
-                       BUG_ON(l2 != &old_cg->cg_links);
+               if (l1 == &cset->cgrp_links) {
+                       BUG_ON(l2 != &old_cset->cgrp_links);
                         break;
                 } else {
-                       BUG_ON(l2 == &old_cg->cg_links);
+                       BUG_ON(l2 == &old_cset->cgrp_links);
                 }
                 /* Locate the cgroups associated with these links. */
-               cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list);
-               cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list);
-               cg1 = cgl1->cgrp;
-               cg2 = cgl2->cgrp;
+               link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
+               link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
+               cgrp1 = link1->cgrp;
+               cgrp2 = link2->cgrp;
                 /* Hierarchies should be linked in the same order. */
-               BUG_ON(cg1->root != cg2->root);
+               BUG_ON(cgrp1->root != cgrp2->root);
  
                 /*
                  * If this hierarchy is the hierarchy of the cgroup
@@ -500,11 +485,11 @@ static bool compare_css_sets(struct css_set *cg,
                  * hierarchy, then this css_set should point to the
                  * same cgroup as the old css_set.
                  */
-               if (cg1->root == new_cgrp->root) {
-                       if (cg1 != new_cgrp)
+               if (cgrp1->root == new_cgrp->root) {
+                       if (cgrp1 != new_cgrp)
                                 return false;
                 } else {
-                       if (cg1 != cg2)
+                       if (cgrp1 != cgrp2)
                                 return false;
                 }
         }
@@ -524,14 +509,13 @@ static bool compare_css_sets(struct css_set *cg,
   * template: location in which to build the desired set of subsystem
   * state objects for the new cgroup group
   */
-static struct css_set *find_existing_css_set(
-       struct css_set *oldcg,
-       struct cgroup *cgrp,
-       struct cgroup_subsys_state *template[])
+static struct css_set *find_existing_css_set(struct css_set *old_cset,
+                                       struct cgroup *cgrp,
+                                       struct cgroup_subsys_state *template[])
  {
         int i;
         struct cgroupfs_root *root = cgrp->root;
-       struct css_set *cg;
+       struct css_set *cset;
         unsigned long key;
  
         /*
@@ -548,78 +532,80 @@ static struct css_set *find_existing_css_set(
                 } else {
                         /* Subsystem is not in this hierarchy, so we
                          * don't want to change the subsystem state */
-                       template[i] = oldcg->subsys[i];
+                       template[i] = old_cset->subsys[i];
                 }
         }
  
         key = css_set_hash(template);
-       hash_for_each_possible(css_set_table, cg, hlist, key) {
-               if (!compare_css_sets(cg, oldcg, cgrp, template))
+       hash_for_each_possible(css_set_table, cset, hlist, key) {
+               if (!compare_css_sets(cset, old_cset, cgrp, template))
                         continue;
  
                 /* This css_set matches what we need */
-               return cg;
+               return cset;
         }
  
         /* No existing cgroup group matched */
         return NULL;
  }
  
-static void free_cg_links(struct list_head *tmp)
+static void free_cgrp_cset_links(struct list_head *links_to_free)
  {
-       struct cg_cgroup_link *link;
-       struct cg_cgroup_link *saved_link;
+       struct cgrp_cset_link *link, *tmp_link;
  
-       list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
-               list_del(&link->cgrp_link_list);
+       list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
+               list_del(&link->cset_link);
                 kfree(link);
         }
  }
  
-/*
- * allocate_cg_links() allocates "count" cg_cgroup_link structures
- * and chains them on tmp through their cgrp_link_list fields. Returns 0 on
- * success or a negative error
+/**
+ * allocate_cgrp_cset_links - allocate cgrp_cset_links
+ * @count: the number of links to allocate
+ * @tmp_links: list_head the allocated links are put on
+ *
+ * Allocate @count cgrp_cset_link structures and chain them on @tmp_links
+ * through ->cset_link.  Returns 0 on success or -errno.
   */
-static int allocate_cg_links(int count, struct list_head *tmp)
+static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
  {
-       struct cg_cgroup_link *link;
+       struct cgrp_cset_link *link;
         int i;
-       INIT_LIST_HEAD(tmp);
+
+       INIT_LIST_HEAD(tmp_links);
+
         for (i = 0; i < count; i++) {
-               link = kmalloc(sizeof(*link), GFP_KERNEL);
+               link = kzalloc(sizeof(*link), GFP_KERNEL);
                 if (!link) {
-                       free_cg_links(tmp);
+                       free_cgrp_cset_links(tmp_links);
                         return -ENOMEM;
                 }
-               list_add(&link->cgrp_link_list, tmp);
+               list_add(&link->cset_link, tmp_links);
         }
         return 0;
  }
  
  /**
   * link_css_set - a helper function to link a css_set to a cgroup
- * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links()
- * @cg: the css_set to be linked
+ * @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links()
+ * @cset: the css_set to be linked
   * @cgrp: the destination cgroup
   */
-static void link_css_set(struct list_head *tmp_cg_links,
-                        struct css_set *cg, struct cgroup *cgrp)
+static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
+                        struct cgroup *cgrp)
  {
-       struct cg_cgroup_link *link;
+       struct cgrp_cset_link *link;
  
-       BUG_ON(list_empty(tmp_cg_links));
-       link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
-                               cgrp_link_list);
-       link->cg = cg;
+       BUG_ON(list_empty(tmp_links));
+       link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
+       link->cset = cset;
         link->cgrp = cgrp;
-       atomic_inc(&cgrp->count);
-       list_move(&link->cgrp_link_list, &cgrp->css_sets);
+       list_move(&link->cset_link, &cgrp->cset_links);
         /*
          * Always add links to the tail of the list so that the list
          * is sorted by order of hierarchy creation
          */
-       list_add_tail(&link->cg_link_list, &cg->cg_links);
+       list_add_tail(&link->cgrp_link, &cset->cgrp_links);
  }
  
  /*
@@ -629,67 +615,66 @@ static void link_css_set(struct list_head *tmp_cg_links,
   * substituted into the appropriate hierarchy. Must be called with
   * cgroup_mutex held
   */
-static struct css_set *find_css_set(
-       struct css_set *oldcg, struct cgroup *cgrp)
+static struct css_set *find_css_set(struct css_set *old_cset,
+                                   struct cgroup *cgrp)
  {
-       struct css_set *res;
+       struct css_set *cset;
         struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
-
-       struct list_head tmp_cg_links;
-
-       struct cg_cgroup_link *link;
+       struct list_head tmp_links;
+       struct cgrp_cset_link *link;
         unsigned long key;
  
         /* First see if we already have a cgroup group that matches
          * the desired set */
         read_lock(&css_set_lock);
-       res = find_existing_css_set(oldcg, cgrp, template);
-       if (res)
-               get_css_set(res);
+       cset = find_existing_css_set(old_cset, cgrp, template);
+       if (cset)
+               get_css_set(cset);
         read_unlock(&css_set_lock);
  
-       if (res)
-               return res;
+       if (cset)
+               return cset;
  
-       res = kmalloc(sizeof(*res), GFP_KERNEL);
-       if (!res)
+       cset = kzalloc(sizeof(*cset), GFP_KERNEL);
+       if (!cset)
                 return NULL;
  
-       /* Allocate all the cg_cgroup_link objects that we'll need */
-       if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
-               kfree(res);
+       /* Allocate all the cgrp_cset_link objects that we'll need */
+       if (allocate_cgrp_cset_links(root_count, &tmp_links) < 0) {
+               kfree(cset);
                 return NULL;
         }
  
-       atomic_set(&res->refcount, 1);
-       INIT_LIST_HEAD(&res->cg_links);
-       INIT_LIST_HEAD(&res->tasks);
-       INIT_HLIST_NODE(&res->hlist);
+       atomic_set(&cset->refcount, 1);
+       INIT_LIST_HEAD(&cset->cgrp_links);
+       INIT_LIST_HEAD(&cset->tasks);
+       INIT_HLIST_NODE(&cset->hlist);
  
         /* Copy the set of subsystem state objects generated in
          * find_existing_css_set() */
-       memcpy(res->subsys, template, sizeof(res->subsys));
+       memcpy(cset->subsys, template, sizeof(cset->subsys));
  
         write_lock(&css_set_lock);
         /* Add reference counts and links from the new css_set. */
-       list_for_each_entry(link, &oldcg->cg_links, cg_link_list) {
+       list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
                 struct cgroup *c = link->cgrp;
+
                 if (c->root == cgrp->root)
                         c = cgrp;
-               link_css_set(&tmp_cg_links, res, c);
+               link_css_set(&tmp_links, cset, c);
         }
  
-       BUG_ON(!list_empty(&tmp_cg_links));
+       BUG_ON(!list_empty(&tmp_links));
  
         css_set_count++;
  
         /* Add this cgroup group to the hash table */
-       key = css_set_hash(res->subsys);
-       hash_add(css_set_table, &res->hlist, key);
+       key = css_set_hash(cset->subsys);
+       hash_add(css_set_table, &cset->hlist, key);
  
         write_unlock(&css_set_lock);
  
-       return res;
+       return cset;
  }
  
  /*
@@ -699,7 +684,7 @@ static struct css_set *find_css_set(
  static struct cgroup *task_cgroup_from_root(struct task_struct *task,
                                             struct cgroupfs_root *root)
  {
-       struct css_set *css;
+       struct css_set *cset;
         struct cgroup *res = NULL;
  
         BUG_ON(!mutex_is_locked(&cgroup_mutex));
@@ -709,13 +694,15 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
          * task can't change groups, so the only thing that can happen
          * is that it exits and its css is set back to init_css_set.
          */
-       css = task->cgroups;
-       if (css == &init_css_set) {
+       cset = task->cgroups;
+       if (cset == &init_css_set) {
                 res = &root->top_cgroup;
         } else {
-               struct cg_cgroup_link *link;
-               list_for_each_entry(link, &css->cg_links, cg_link_list) {
+               struct cgrp_cset_link *link;
+
+               list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
                         struct cgroup *c = link->cgrp;
+
                         if (c->root == root) {
                                 res = c;
                                 break;
@@ -828,7 +815,7 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
  
  static void cgroup_free_fn(struct work_struct *work)
  {
-       struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
+       struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
         struct cgroup_subsys *ss;
  
         mutex_lock(&cgroup_mutex);
@@ -873,7 +860,8 @@ static void cgroup_free_rcu(struct rcu_head *head)
  {
         struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
  
-       schedule_work(&cgrp->free_work);
+       INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
+       schedule_work(&cgrp->destroy_work);
  }
  
  static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -882,7 +870,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
         if (S_ISDIR(inode->i_mode)) {
                 struct cgroup *cgrp = dentry->d_fsdata;
  
-               BUG_ON(!(cgroup_is_removed(cgrp)));
+               BUG_ON(!(cgroup_is_dead(cgrp)));
                 call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
         } else {
                 struct cfent *cfe = __d_cfe(dentry);
@@ -1401,11 +1389,10 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
         INIT_LIST_HEAD(&cgrp->sibling);
         INIT_LIST_HEAD(&cgrp->children);
         INIT_LIST_HEAD(&cgrp->files);
-       INIT_LIST_HEAD(&cgrp->css_sets);
+       INIT_LIST_HEAD(&cgrp->cset_links);
         INIT_LIST_HEAD(&cgrp->allcg_node);
         INIT_LIST_HEAD(&cgrp->release_list);
         INIT_LIST_HEAD(&cgrp->pidlists);
-       INIT_WORK(&cgrp->free_work, cgroup_free_fn);
         mutex_init(&cgrp->pidlist_mutex);
         INIT_LIST_HEAD(&cgrp->event_list);
         spin_lock_init(&cgrp->event_list_lock);
@@ -1426,29 +1413,30 @@ static void init_cgroup_root(struct cgroupfs_root *root)
         list_add_tail(&cgrp->allcg_node, &root->allcg_list);
  }
  
-static bool init_root_id(struct cgroupfs_root *root)
+static int cgroup_init_root_id(struct cgroupfs_root *root)
  {
-       int ret = 0;
+       int id;
  
-       do {
-               if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL))
-                       return false;
-               spin_lock(&hierarchy_id_lock);
-               /* Try to allocate the next unused ID */
-               ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id,
-                                       &root->hierarchy_id);
-               if (ret == -ENOSPC)
-                       /* Try again starting from 0 */
-                       ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id);
-               if (!ret) {
-                       next_hierarchy_id = root->hierarchy_id + 1;
-               } else if (ret != -EAGAIN) {
-                       /* Can only get here if the 31-bit IDR is full ... */
-                       BUG_ON(ret);
-               }
-               spin_unlock(&hierarchy_id_lock);
-       } while (ret);
-       return true;
+       lockdep_assert_held(&cgroup_mutex);
+       lockdep_assert_held(&cgroup_root_mutex);
+
+       id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 2, 0, GFP_KERNEL);
+       if (id < 0)
+               return id;
+
+       root->hierarchy_id = id;
+       return 0;
+}
+
+static void cgroup_exit_root_id(struct cgroupfs_root *root)
+{
+       lockdep_assert_held(&cgroup_mutex);
+       lockdep_assert_held(&cgroup_root_mutex);
+
+       if (root->hierarchy_id) {
+               idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
+               root->hierarchy_id = 0;
+       }
  }
  
  static int cgroup_test_super(struct super_block *sb, void *data)
@@ -1482,10 +1470,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
         if (!root)
                 return ERR_PTR(-ENOMEM);
  
-       if (!init_root_id(root)) {
-               kfree(root);
-               return ERR_PTR(-ENOMEM);
-       }
         init_cgroup_root(root);
  
         root->subsys_mask = opts->subsys_mask;
@@ -1500,17 +1484,15 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
         return root;
  }
  
-static void cgroup_drop_root(struct cgroupfs_root *root)
+static void cgroup_free_root(struct cgroupfs_root *root)
  {
-       if (!root)
-               return;
+       if (root) {
+               /* hierarhcy ID shoulid already have been released */
+               WARN_ON_ONCE(root->hierarchy_id);
  
-       BUG_ON(!root->hierarchy_id);
-       spin_lock(&hierarchy_id_lock);
-       ida_remove(&hierarchy_ida, root->hierarchy_id);
-       spin_unlock(&hierarchy_id_lock);
-       ida_destroy(&root->cgroup_ida);
-       kfree(root);
+               ida_destroy(&root->cgroup_ida);
+               kfree(root);
+       }
  }
  
  static int cgroup_set_super(struct super_block *sb, void *data)
@@ -1597,7 +1579,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
         sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts);
         if (IS_ERR(sb)) {
                 ret = PTR_ERR(sb);
-               cgroup_drop_root(opts.new_root);
+               cgroup_free_root(opts.new_root);
                 goto drop_modules;
         }
  
@@ -1605,12 +1587,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
         BUG_ON(!root);
         if (root == opts.new_root) {
                 /* We used the new root structure, so this is a new hierarchy */
-               struct list_head tmp_cg_links;
+               struct list_head tmp_links;
                 struct cgroup *root_cgrp = &root->top_cgroup;
                 struct cgroupfs_root *existing_root;
                 const struct cred *cred;
                 int i;
-               struct css_set *cg;
+               struct css_set *cset;
  
                 BUG_ON(sb->s_root != NULL);
  
@@ -1637,13 +1619,17 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                  * that's us. The worst that can happen is that we
                  * have some link structures left over
                  */
-               ret = allocate_cg_links(css_set_count, &tmp_cg_links);
+               ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
+               if (ret)
+                       goto unlock_drop;
+
+               ret = cgroup_init_root_id(root);
                 if (ret)
                         goto unlock_drop;
  
                 ret = rebind_subsystems(root, root->subsys_mask);
                 if (ret == -EBUSY) {
-                       free_cg_links(&tmp_cg_links);
+                       free_cgrp_cset_links(&tmp_links);
                         goto unlock_drop;
                 }
                 /*
@@ -1664,11 +1650,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                 /* Link the top cgroup in this hierarchy into all
                  * the css_set objects */
                 write_lock(&css_set_lock);
-               hash_for_each(css_set_table, i, cg, hlist)
-                       link_css_set(&tmp_cg_links, cg, root_cgrp);
+               hash_for_each(css_set_table, i, cset, hlist)
+                       link_css_set(&tmp_links, cset, root_cgrp);
                 write_unlock(&css_set_lock);
  
-               free_cg_links(&tmp_cg_links);
+               free_cgrp_cset_links(&tmp_links);
  
                 BUG_ON(!list_empty(&root_cgrp->children));
                 BUG_ON(root->number_of_cgroups != 1);
@@ -1684,13 +1670,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                  * We re-used an existing hierarchy - the new root (if
                  * any) is not needed
                  */
-               cgroup_drop_root(opts.new_root);
+               cgroup_free_root(opts.new_root);
  
-               if (((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) &&
-                   root->flags != opts.flags) {
-                       pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
-                       ret = -EINVAL;
-                       goto drop_new_super;
+               if (root->flags != opts.flags) {
+                       if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
+                               pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n");
+                               ret = -EINVAL;
+                               goto drop_new_super;
+                       } else {
+                               pr_warning("cgroup: new mount options do not match the existing superblock, will be ignored\n");
+                       }
                 }
  
                 /* no subsys rebinding, so refcounts don't change */
@@ -1702,6 +1691,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
         return dget(sb->s_root);
  
   unlock_drop:
+       cgroup_exit_root_id(root);
         mutex_unlock(&cgroup_root_mutex);
         mutex_unlock(&cgroup_mutex);
         mutex_unlock(&inode->i_mutex);
@@ -1718,9 +1708,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
  static void cgroup_kill_sb(struct super_block *sb) {
         struct cgroupfs_root *root = sb->s_fs_info;
         struct cgroup *cgrp = &root->top_cgroup;
+       struct cgrp_cset_link *link, *tmp_link;
         int ret;
-       struct cg_cgroup_link *link;
-       struct cg_cgroup_link *saved_link;
  
         BUG_ON(!root);
  
@@ -1736,15 +1725,14 @@ static void cgroup_kill_sb(struct super_block *sb) {
         BUG_ON(ret);
  
         /*
-        * Release all the links from css_sets to this hierarchy's
+        * Release all the links from cset_links to this hierarchy's
          * root cgroup
          */
         write_lock(&css_set_lock);
  
-       list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
-                                cgrp_link_list) {
-               list_del(&link->cg_link_list);
-               list_del(&link->cgrp_link_list);
+       list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
+               list_del(&link->cset_link);
+               list_del(&link->cgrp_link);
                 kfree(link);
         }
         write_unlock(&css_set_lock);
@@ -1754,13 +1742,15 @@ static void cgroup_kill_sb(struct super_block *sb) {
                 root_count--;
         }
  
+       cgroup_exit_root_id(root);
+
         mutex_unlock(&cgroup_root_mutex);
         mutex_unlock(&cgroup_mutex);
  
         simple_xattrs_free(&cgrp->xattrs);
  
         kill_litter_super(sb);
-       cgroup_drop_root(root);
+       cgroup_free_root(root);
  }
  
  static struct file_system_type cgroup_fs_type = {
@@ -1822,6 +1812,38 @@ out:
  }
  EXPORT_SYMBOL_GPL(cgroup_path);
  
+/**
+ * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy
+ * @task: target task
+ * @hierarchy_id: the hierarchy to look up @task's cgroup from
+ * @buf: the buffer to write the path into
+ * @buflen: the length of the buffer
+ *
+ * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and
+ * copy its path into @buf.  This function grabs cgroup_mutex and shouldn't
+ * be used inside locks used by cgroup controller callbacks.
+ */
+int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
+                                   char *buf, size_t buflen)
+{
+       struct cgroupfs_root *root;
+       struct cgroup *cgrp = NULL;
+       int ret = -ENOENT;
+
+       mutex_lock(&cgroup_mutex);
+
+       root = idr_find(&cgroup_hierarchy_idr, hierarchy_id);
+       if (root) {
+               cgrp = task_cgroup_from_root(task, root);
+               ret = cgroup_path(cgrp, buf, buflen);
+       }
+
+       mutex_unlock(&cgroup_mutex);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy);
+
  /*
   * Control Group taskset
   */
@@ -1907,10 +1929,11 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size);
   *
   * Must be called with cgroup_mutex and threadgroup locked.
   */
-static void cgroup_task_migrate(struct cgroup *oldcgrp,
-                               struct task_struct *tsk, struct css_set *newcg)
+static void cgroup_task_migrate(struct cgroup *old_cgrp,
+                               struct task_struct *tsk,
+                               struct css_set *new_cset)
  {
-       struct css_set *oldcg;
+       struct css_set *old_cset;
  
         /*
          * We are synchronized through threadgroup_lock() against PF_EXITING
@@ -1918,25 +1941,25 @@ static void cgroup_task_migrate(struct cgroup *oldcgrp,
          * css_set to init_css_set and dropping the old one.
          */
         WARN_ON_ONCE(tsk->flags & PF_EXITING);
-       oldcg = tsk->cgroups;
+       old_cset = tsk->cgroups;
  
         task_lock(tsk);
-       rcu_assign_pointer(tsk->cgroups, newcg);
+       rcu_assign_pointer(tsk->cgroups, new_cset);
         task_unlock(tsk);
  
         /* Update the css_set linked lists if we're using them */
         write_lock(&css_set_lock);
         if (!list_empty(&tsk->cg_list))
-               list_move(&tsk->cg_list, &newcg->tasks);
+               list_move(&tsk->cg_list, &new_cset->tasks);
         write_unlock(&css_set_lock);
  
         /*
-        * We just gained a reference on oldcg by taking it from the task. As
-        * trading it for newcg is protected by cgroup_mutex, we're safe to drop
-        * it here; it will be freed under RCU.
+        * We just gained a reference on old_cset by taking it from the
+        * task. As trading it for new_cset is protected by cgroup_mutex,
+        * we're safe to drop it here; it will be freed under RCU.
          */
-       set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
-       put_css_set(oldcg);
+       set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
+       put_css_set(old_cset);
  }
  
  /**
@@ -2320,7 +2343,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
         struct cftype *cft = __d_cft(file->f_dentry);
         struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
  
-       if (cgroup_is_removed(cgrp))
+       if (cgroup_is_dead(cgrp))
                 return -ENODEV;
         if (cft->write)
                 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
@@ -2365,7 +2388,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf,
         struct cftype *cft = __d_cft(file->f_dentry);
         struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
  
-       if (cgroup_is_removed(cgrp))
+       if (cgroup_is_dead(cgrp))
                 return -ENODEV;
  
         if (cft->read)
@@ -2432,10 +2455,12 @@ static int cgroup_file_open(struct inode *inode, struct file *file)
         cft = __d_cft(file->f_dentry);
  
         if (cft->read_map || cft->read_seq_string) {
-               struct cgroup_seqfile_state *state =
-                       kzalloc(sizeof(*state), GFP_USER);
+               struct cgroup_seqfile_state *state;
+
+               state = kzalloc(sizeof(*state), GFP_USER);
                 if (!state)
                         return -ENOMEM;
+
                 state->cft = cft;
                 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
                 file->f_op = &cgroup_seqfile_operations;
@@ -2483,6 +2508,13 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
  
         cgrp = __d_cgrp(old_dentry);
  
+       /*
+        * This isn't a proper migration and its usefulness is very
+        * limited.  Disallow if sane_behavior.
+        */
+       if (cgroup_sane_behavior(cgrp))
+               return -EPERM;
+
         name = cgroup_alloc_name(new_dentry);
         if (!name)
                 return -ENOMEM;
@@ -2699,13 +2731,14 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
                 goto out;
         }
  
+       cfe->type = (void *)cft;
+       cfe->dentry = dentry;
+       dentry->d_fsdata = cfe;
+       simple_xattrs_init(&cfe->xattrs);
+
         mode = cgroup_file_mode(cft);
         error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
         if (!error) {
-               cfe->type = (void *)cft;
-               cfe->dentry = dentry;
-               dentry->d_fsdata = cfe;
-               simple_xattrs_init(&cfe->xattrs);
                 list_add_tail(&cfe->node, &parent->files);
                 cfe = NULL;
         }
@@ -2765,13 +2798,17 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
  {
         LIST_HEAD(pending);
         struct cgroup *cgrp, *n;
+       struct super_block *sb = ss->root->sb;
  
         /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
-       if (cfts && ss->root != &rootnode) {
+       if (cfts && ss->root != &rootnode &&
+           atomic_inc_not_zero(sb->s_active)) {
                 list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) {
                         dget(cgrp->dentry);
                         list_add_tail(&cgrp->cft_q_node, &pending);
                 }
+       } else {
+               sb = NULL;
         }
  
         mutex_unlock(&cgroup_mutex);
@@ -2785,7 +2822,7 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
  
                 mutex_lock(&inode->i_mutex);
                 mutex_lock(&cgroup_mutex);
-               if (!cgroup_is_removed(cgrp))
+               if (!cgroup_is_dead(cgrp))
                         cgroup_addrm_files(cgrp, ss, cfts, is_add);
                 mutex_unlock(&cgroup_mutex);
                 mutex_unlock(&inode->i_mutex);
@@ -2794,6 +2831,9 @@ static void cgroup_cfts_commit(struct cgroup_subsys *ss,
                 dput(cgrp->dentry);
         }
  
+       if (sb)
+               deactivate_super(sb);
+
         mutex_unlock(&cgroup_cft_mutex);
  }
  
@@ -2849,7 +2889,8 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
  
         list_for_each_entry(set, &ss->cftsets, node) {
                 if (set->cfts == cfts) {
-                       list_del_init(&set->node);
+                       list_del(&set->node);
+                       kfree(set);
                         cgroup_cfts_commit(ss, cfts, false);
                         return 0;
                 }
@@ -2868,12 +2909,11 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
  int cgroup_task_count(const struct cgroup *cgrp)
  {
         int count = 0;
-       struct cg_cgroup_link *link;
+       struct cgrp_cset_link *link;
  
         read_lock(&css_set_lock);
-       list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
-               count += atomic_read(&link->cg->refcount);
-       }
+       list_for_each_entry(link, &cgrp->cset_links, cset_link)
+               count += atomic_read(&link->cset->refcount);
         read_unlock(&css_set_lock);
         return count;
  }
@@ -2882,25 +2922,24 @@ int cgroup_task_count(const struct cgroup *cgrp)
   * Advance a list_head iterator.  The iterator should be positioned at
   * the start of a css_set
   */
-static void cgroup_advance_iter(struct cgroup *cgrp,
-                               struct cgroup_iter *it)
+static void cgroup_advance_iter(struct cgroup *cgrp, struct cgroup_iter *it)
  {
-       struct list_head *l = it->cg_link;
-       struct cg_cgroup_link *link;
-       struct css_set *cg;
+       struct list_head *l = it->cset_link;
+       struct cgrp_cset_link *link;
+       struct css_set *cset;
  
         /* Advance to the next non-empty css_set */
         do {
                 l = l->next;
-               if (l == &cgrp->css_sets) {
-                       it->cg_link = NULL;
+               if (l == &cgrp->cset_links) {
+                       it->cset_link = NULL;
                         return;
                 }
-               link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
-               cg = link->cg;
-       } while (list_empty(&cg->tasks));
-       it->cg_link = l;
-       it->task = cg->tasks.next;
+               link = list_entry(l, struct cgrp_cset_link, cset_link);
+               cset = link->cset;
+       } while (list_empty(&cset->tasks));
+       it->cset_link = l;
+       it->task = cset->tasks.next;
  }
  
  /*
@@ -2937,6 +2976,56 @@ static void cgroup_enable_task_cg_lists(void)
         write_unlock(&css_set_lock);
  }
  
+/**
+ * cgroup_next_sibling - find the next sibling of a given cgroup
+ * @pos: the current cgroup
+ *
+ * This function returns the next sibling of @pos and should be called
+ * under RCU read lock.  The only requirement is that @pos is accessible.
+ * The next sibling is guaranteed to be returned regardless of @pos's
+ * state.
+ */
+struct cgroup *cgroup_next_sibling(struct cgroup *pos)
+{
+       struct cgroup *next;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
+       /*
+        * @pos could already have been removed.  Once a cgroup is removed,
+        * its ->sibling.next is no longer updated when its next sibling
+        * changes.  As CGRP_DEAD assertion is serialized and happens
+        * before the cgroup is taken off the ->sibling list, if we see it
+        * unasserted, it's guaranteed that the next sibling hasn't
+        * finished its grace period even if it's already removed, and thus
+        * safe to dereference from this RCU critical section.  If
+        * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
+        * to be visible as %true here.
+        */
+       if (likely(!cgroup_is_dead(pos))) {
+               next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
+               if (&next->sibling != &pos->parent->children)
+                       return next;
+               return NULL;
+       }
+
+       /*
+        * Can't dereference the next pointer.  Each cgroup is given a
+        * monotonically increasing unique serial number and always
+        * appended to the sibling list, so the next one can be found by
+        * walking the parent's children until we see a cgroup with higher
+        * serial number than @pos's.
+        *
+        * While this path can be slow, it's taken only when either the
+        * current cgroup is removed or iteration and removal race.
+        */
+       list_for_each_entry_rcu(next, &pos->parent->children, sibling)
+               if (next->serial_nr > pos->serial_nr)
+                       return next;
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(cgroup_next_sibling);
+
  /**
   * cgroup_next_descendant_pre - find the next descendant for pre-order walk
   * @pos: the current position (%NULL to initiate traversal)
@@ -2944,6 +3033,11 @@ static void cgroup_enable_task_cg_lists(void)
   *
   * To be used by cgroup_for_each_descendant_pre().  Find the next
   * descendant to visit for pre-order traversal of @cgroup's descendants.
+ *
+ * While this function requires RCU read locking, it doesn't require the
+ * whole traversal to be contained in a single RCU critical section.  This
+ * function will return the correct next descendant as long as both @pos
+ * and @cgroup are accessible and @pos is a descendant of @cgroup.
   */
  struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
                                           struct cgroup *cgroup)
@@ -2953,11 +3047,8 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
         WARN_ON_ONCE(!rcu_read_lock_held());
  
         /* if first iteration, pretend we just visited @cgroup */
-       if (!pos) {
-               if (list_empty(&cgroup->children))
-                       return NULL;
+       if (!pos)
                 pos = cgroup;
-       }
  
         /* visit the first child if exists */
         next = list_first_or_null_rcu(&pos->children, struct cgroup, sibling);
@@ -2965,14 +3056,12 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos,
                 return next;
  
         /* no child, visit my or the closest ancestor's next sibling */
-       do {
-               next = list_entry_rcu(pos->sibling.next, struct cgroup,
-                                     sibling);
-               if (&next->sibling != &pos->parent->children)
+       while (pos != cgroup) {
+               next = cgroup_next_sibling(pos);
+               if (next)
                         return next;
-
                 pos = pos->parent;
-       } while (pos != cgroup);
+       }
  
         return NULL;
  }
@@ -2985,6 +3074,11 @@ EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre);
   * Return the rightmost descendant of @pos.  If there's no descendant,
   * @pos is returned.  This can be used during pre-order traversal to skip
   * subtree of @pos.
+ *
+ * While this function requires RCU read locking, it doesn't require the
+ * whole traversal to be contained in a single RCU critical section.  This
+ * function will return the correct rightmost descendant as long as @pos is
+ * accessible.
   */
  struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos)
  {
@@ -3024,6 +3118,11 @@ static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos)
   *
   * To be used by cgroup_for_each_descendant_post().  Find the next
   * descendant to visit for post-order traversal of @cgroup's descendants.
+ *
+ * While this function requires RCU read locking, it doesn't require the
+ * whole traversal to be contained in a single RCU critical section.  This
+ * function will return the correct next descendant as long as both @pos
+ * and @cgroup are accessible and @pos is a descendant of @cgroup.
   */
  struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
                                            struct cgroup *cgroup)
@@ -3039,8 +3138,8 @@ struct cgroup *cgroup_next_descendant_post(struct cgroup *pos,
         }
  
         /* if there's an unvisited sibling, visit its leftmost descendant */
-       next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
-       if (&next->sibling != &pos->parent->children)
+       next = cgroup_next_sibling(pos);
+       if (next)
                 return cgroup_leftmost_descendant(next);
  
         /* no sibling left, visit parent */
@@ -3061,7 +3160,7 @@ void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
                 cgroup_enable_task_cg_lists();
  
         read_lock(&css_set_lock);
-       it->cg_link = &cgrp->css_sets;
+       it->cset_link = &cgrp->cset_links;
         cgroup_advance_iter(cgrp, it);
  }
  
@@ -3070,16 +3169,16 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
  {
         struct task_struct *res;
         struct list_head *l = it->task;
-       struct cg_cgroup_link *link;
+       struct cgrp_cset_link *link;
  
         /* If the iterator cg is NULL, we have no tasks */
-       if (!it->cg_link)
+       if (!it->cset_link)
                 return NULL;
         res = list_entry(l, struct task_struct, cg_list);
         /* Advance iterator to find next entry */
         l = l->next;
-       link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
-       if (l == &link->cg->tasks) {
+       link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link);
+       if (l == &link->cset->tasks) {
                 /* We reached the end of this task list - move on to
                  * the next cg_cgroup_link */
                 cgroup_advance_iter(cgrp, it);
@@ -3410,7 +3509,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
                 }
         }
         /* entry not found; create a new one */
-       l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
+       l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
         if (!l) {
                 mutex_unlock(&cgrp->pidlist_mutex);
                 return l;
@@ -3419,8 +3518,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
         down_write(&l->mutex);
         l->key.type = type;
         l->key.ns = get_pid_ns(ns);
-       l->use_count = 0; /* don't increment here */
-       l->list = NULL;
         l->owner = cgrp;
         list_add(&l->links, &cgrp->pidlists);
         mutex_unlock(&cgrp->pidlist_mutex);
@@ -3725,6 +3822,23 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
         return 0;
  }
  
+/*
+ * When dput() is called asynchronously, if umount has been done and
+ * then deactivate_super() in cgroup_free_fn() kills the superblock,
+ * there's a small window that vfs will see the root dentry with non-zero
+ * refcnt and trigger BUG().
+ *
+ * That's why we hold a reference before dput() and drop it right after.
+ */
+static void cgroup_dput(struct cgroup *cgrp)
+{
+       struct super_block *sb = cgrp->root->sb;
+
+       atomic_inc(&sb->s_active);
+       dput(cgrp->dentry);
+       deactivate_super(sb);
+}
+
  /*
   * Unregister event and free resources.
   *
@@ -3745,7 +3859,7 @@ static void cgroup_event_remove(struct work_struct *work)
  
         eventfd_ctx_put(event->eventfd);
         kfree(event);
-       dput(cgrp->dentry);
+       cgroup_dput(cgrp);
  }
  
  /*
@@ -3932,33 +4046,16 @@ static int cgroup_clone_children_write(struct cgroup *cgrp,
         return 0;
  }
  
-/*
- * for the common functions, 'private' gives the type of file
- */
-/* for hysterical raisins, we can't put this on the older files */
-#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
-static struct cftype files[] = {
+static struct cftype cgroup_base_files[] = {
         {
-               .name = "tasks",
-               .open = cgroup_tasks_open,
-               .write_u64 = cgroup_tasks_write,
-               .release = cgroup_pidlist_release,
-               .mode = S_IRUGO | S_IWUSR,
-       },
-       {
-               .name = CGROUP_FILE_GENERIC_PREFIX "procs",
+               .name = "cgroup.procs",
                 .open = cgroup_procs_open,
                 .write_u64 = cgroup_procs_write,
                 .release = cgroup_pidlist_release,
                 .mode = S_IRUGO | S_IWUSR,
         },
         {
-               .name = "notify_on_release",
-               .read_u64 = cgroup_read_notify_on_release,
-               .write_u64 = cgroup_write_notify_on_release,
-       },
-       {
-               .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
+               .name = "cgroup.event_control",
                 .write_string = cgroup_write_event_control,
                 .mode = S_IWUGO,
         },
@@ -3973,9 +4070,29 @@ static struct cftype files[] = {
                 .flags = CFTYPE_ONLY_ON_ROOT,
                 .read_seq_string = cgroup_sane_behavior_show,
         },
+
+       /*
+        * Historical crazy stuff.  These don't have "cgroup."  prefix and
+        * don't exist if sane_behavior.  If you're depending on these, be
+        * prepared to be burned.
+        */
+       {
+               .name = "tasks",
+               .flags = CFTYPE_INSANE,         /* use "procs" instead */
+               .open = cgroup_tasks_open,
+               .write_u64 = cgroup_tasks_write,
+               .release = cgroup_pidlist_release,
+               .mode = S_IRUGO | S_IWUSR,
+       },
+       {
+               .name = "notify_on_release",
+               .flags = CFTYPE_INSANE,
+               .read_u64 = cgroup_read_notify_on_release,
+               .write_u64 = cgroup_write_notify_on_release,
+       },
         {
                 .name = "release_agent",
-               .flags = CFTYPE_ONLY_ON_ROOT,
+               .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
                 .read_seq_string = cgroup_release_agent_show,
                 .write_string = cgroup_release_agent_write,
                 .max_write_len = PATH_MAX,
@@ -3996,7 +4113,7 @@ static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
         struct cgroup_subsys *ss;
  
         if (base_files) {
-               err = cgroup_addrm_files(cgrp, NULL, files, true);
+               err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true);
                 if (err < 0)
                         return err;
         }
@@ -4030,12 +4147,16 @@ static void css_dput_fn(struct work_struct *work)
  {
         struct cgroup_subsys_state *css =
                 container_of(work, struct cgroup_subsys_state, dput_work);
-       struct dentry *dentry = css->cgroup->dentry;
-       struct super_block *sb = dentry->d_sb;
  
-       atomic_inc(&sb->s_active);
-       dput(dentry);
-       deactivate_super(sb);
+       cgroup_dput(css->cgroup);
+}
+
+static void css_release(struct percpu_ref *ref)
+{
+       struct cgroup_subsys_state *css =
+               container_of(ref, struct cgroup_subsys_state, refcnt);
+
+       schedule_work(&css->dput_work);
  }
  
  static void init_cgroup_css(struct cgroup_subsys_state *css,
@@ -4043,7 +4164,6 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
                                struct cgroup *cgrp)
  {
         css->cgroup = cgrp;
-       atomic_set(&css->refcnt, 1);
         css->flags = 0;
         css->id = NULL;
         if (cgrp == dummytop)
@@ -4102,6 +4222,7 @@ static void offline_css(struct cgroup_subsys *ss, struct cgroup *cgrp)
  static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
                              umode_t mode)
  {
+       static atomic64_t serial_nr_cursor = ATOMIC64_INIT(0);
         struct cgroup *cgrp;
         struct cgroup_name *name;
         struct cgroupfs_root *root = parent->root;
@@ -4164,7 +4285,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
                         err = PTR_ERR(css);
                         goto err_free_all;
                 }
+
+               err = percpu_ref_init(&css->refcnt, css_release);
+               if (err)
+                       goto err_free_all;
+
                 init_cgroup_css(css, ss, cgrp);
+
                 if (ss->use_id) {
                         err = alloc_css_id(ss, parent, cgrp);
                         if (err)
@@ -4182,6 +4309,14 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
                 goto err_free_all;
         lockdep_assert_held(&dentry->d_inode->i_mutex);
  
+       /*
+        * Assign a monotonically increasing serial number.  With the list
+        * appending below, it guarantees that sibling cgroups are always
+        * sorted in the ascending serial number order on the parent's
+        * ->children.
+        */
+       cgrp->serial_nr = atomic64_inc_return(&serial_nr_cursor);
+
         /* allocation complete, commit to creation */
         list_add_tail(&cgrp->allcg_node, &root->allcg_list);
         list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
@@ -4221,8 +4356,12 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
  
  err_free_all:
         for_each_subsys(root, ss) {
-               if (cgrp->subsys[ss->subsys_id])
+               struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
+
+               if (css) {
+                       percpu_ref_cancel_init(&css->refcnt);
                         ss->css_free(cgrp);
+               }
         }
         mutex_unlock(&cgroup_mutex);
         /* Release the reference count that we took on the superblock */
@@ -4250,63 +4389,120 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         return cgroup_create(c_parent, dentry, mode | S_IFDIR);
  }
  
+static void cgroup_css_killed(struct cgroup *cgrp)
+{
+       if (!atomic_dec_and_test(&cgrp->css_kill_cnt))
+               return;
+
+       /* percpu ref's of all css's are killed, kick off the next step */
+       INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
+       schedule_work(&cgrp->destroy_work);
+}
+
+static void css_ref_killed_fn(struct percpu_ref *ref)
+{
+       struct cgroup_subsys_state *css =
+               container_of(ref, struct cgroup_subsys_state, refcnt);
+
+       cgroup_css_killed(css->cgroup);
+}
+
+/**
+ * cgroup_destroy_locked - the first stage of cgroup destruction
+ * @cgrp: cgroup to be destroyed
+ *
+ * css's make use of percpu refcnts whose killing latency shouldn't be
+ * exposed to userland and are RCU protected.  Also, cgroup core needs to
+ * guarantee that css_tryget() won't succeed by the time ->css_offline() is
+ * invoked.  To satisfy all the requirements, destruction is implemented in
+ * the following two steps.
+ *
+ * s1. Verify @cgrp can be destroyed and mark it dying.  Remove all
+ *     userland visible parts and start killing the percpu refcnts of
+ *     css's.  Set up so that the next stage will be kicked off once all
+ *     the percpu refcnts are confirmed to be killed.
+ *
+ * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
+ *     rest of destruction.  Once all cgroup references are gone, the
+ *     cgroup is RCU-freed.
+ *
+ * This function implements s1.  After this step, @cgrp is gone as far as
+ * the userland is concerned and a new cgroup with the same name may be
+ * created.  As cgroup doesn't care about the names internally, this
+ * doesn't cause any problem.
+ */
  static int cgroup_destroy_locked(struct cgroup *cgrp)
         __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
  {
         struct dentry *d = cgrp->dentry;
-       struct cgroup *parent = cgrp->parent;
         struct cgroup_event *event, *tmp;
         struct cgroup_subsys *ss;
+       bool empty;
  
         lockdep_assert_held(&d->d_inode->i_mutex);
         lockdep_assert_held(&cgroup_mutex);
  
-       if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children))
+       /*
+        * css_set_lock synchronizes access to ->cset_links and prevents
+        * @cgrp from being removed while __put_css_set() is in progress.
+        */
+       read_lock(&css_set_lock);
+       empty = list_empty(&cgrp->cset_links) && list_empty(&cgrp->children);
+       read_unlock(&css_set_lock);
+       if (!empty)
                 return -EBUSY;
  
         /*
-        * Block new css_tryget() by deactivating refcnt and mark @cgrp
-        * removed.  This makes future css_tryget() and child creation
-        * attempts fail thus maintaining the removal conditions verified
-        * above.
+        * Block new css_tryget() by killing css refcnts.  cgroup core
+        * guarantees that, by the time ->css_offline() is invoked, no new
+        * css reference will be given out via css_tryget().  We can't
+        * simply call percpu_ref_kill() and proceed to offlining css's
+        * because percpu_ref_kill() doesn't guarantee that the ref is seen
+        * as killed on all CPUs on return.
+        *
+        * Use percpu_ref_kill_and_confirm() to get notifications as each
+        * css is confirmed to be seen as killed on all CPUs.  The
+        * notification callback keeps track of the number of css's to be
+        * killed and schedules cgroup_offline_fn() to perform the rest of
+        * destruction once the percpu refs of all css's are confirmed to
+        * be killed.
          */
+       atomic_set(&cgrp->css_kill_cnt, 1);
         for_each_subsys(cgrp->root, ss) {
                 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
  
-               WARN_ON(atomic_read(&css->refcnt) < 0);
-               atomic_add(CSS_DEACT_BIAS, &css->refcnt);
-       }
-       set_bit(CGRP_REMOVED, &cgrp->flags);
+               /*
+                * Killing would put the base ref, but we need to keep it
+                * alive until after ->css_offline.
+                */
+               percpu_ref_get(&css->refcnt);
  
-       /* tell subsystems to initate destruction */
-       for_each_subsys(cgrp->root, ss)
-               offline_css(ss, cgrp);
+               atomic_inc(&cgrp->css_kill_cnt);
+               percpu_ref_kill_and_confirm(&css->refcnt, css_ref_killed_fn);
+       }
+       cgroup_css_killed(cgrp);
  
         /*
-        * Put all the base refs.  Each css holds an extra reference to the
-        * cgroup's dentry and cgroup removal proceeds regardless of css
-        * refs.  On the last put of each css, whenever that may be, the
-        * extra dentry ref is put so that dentry destruction happens only
-        * after all css's are released.
+        * Mark @cgrp dead.  This prevents further task migration and child
+        * creation by disabling cgroup_lock_live_group().  Note that
+        * CGRP_DEAD assertion is depended upon by cgroup_next_sibling() to
+        * resume iteration after dropping RCU read lock.  See
+        * cgroup_next_sibling() for details.
          */
-       for_each_subsys(cgrp->root, ss)
-               css_put(cgrp->subsys[ss->subsys_id]);
+       set_bit(CGRP_DEAD, &cgrp->flags);
  
+       /* CGRP_DEAD is set, remove from ->release_list for the last time */
         raw_spin_lock(&release_list_lock);
         if (!list_empty(&cgrp->release_list))
                 list_del_init(&cgrp->release_list);
         raw_spin_unlock(&release_list_lock);
  
-       /* delete this cgroup from parent->children */
-       list_del_rcu(&cgrp->sibling);
-       list_del_init(&cgrp->allcg_node);
-
+       /*
+        * Remove @cgrp directory.  The removal puts the base ref but we
+        * aren't quite done with @cgrp yet, so hold onto it.
+        */
         dget(d);
         cgroup_d_remove_dir(d);
-       dput(d);
-
-       set_bit(CGRP_RELEASABLE, &parent->flags);
-       check_for_release(parent);
  
         /*
          * Unregister events and notify userspace.
@@ -4321,6 +4517,54 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
         spin_unlock(&cgrp->event_list_lock);
  
         return 0;
+};
+
+/**
+ * cgroup_offline_fn - the second step of cgroup destruction
+ * @work: cgroup->destroy_free_work
+ *
+ * This function is invoked from a work item for a cgroup which is being
+ * destroyed after the percpu refcnts of all css's are guaranteed to be
+ * seen as killed on all CPUs, and performs the rest of destruction.  This
+ * is the second step of destruction described in the comment above
+ * cgroup_destroy_locked().
+ */
+static void cgroup_offline_fn(struct work_struct *work)
+{
+       struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
+       struct cgroup *parent = cgrp->parent;
+       struct dentry *d = cgrp->dentry;
+       struct cgroup_subsys *ss;
+
+       mutex_lock(&cgroup_mutex);
+
+       /*
+        * css_tryget() is guaranteed to fail now.  Tell subsystems to
+        * initate destruction.
+        */
+       for_each_subsys(cgrp->root, ss)
+               offline_css(ss, cgrp);
+
+       /*
+        * Put the css refs from cgroup_destroy_locked().  Each css holds
+        * an extra reference to the cgroup's dentry and cgroup removal
+        * proceeds regardless of css refs.  On the last put of each css,
+        * whenever that may be, the extra dentry ref is put so that dentry
+        * destruction happens only after all css's are released.
+        */
+       for_each_subsys(cgrp->root, ss)
+               css_put(cgrp->subsys[ss->subsys_id]);
+
+       /* delete this cgroup from parent->children */
+       list_del_rcu(&cgrp->sibling);
+       list_del_init(&cgrp->allcg_node);
+
+       dput(d);
+
+       set_bit(CGRP_RELEASABLE, &parent->flags);
+       check_for_release(parent);
+
+       mutex_unlock(&cgroup_mutex);
  }
  
  static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
@@ -4403,7 +4647,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
         struct cgroup_subsys_state *css;
         int i, ret;
         struct hlist_node *tmp;
-       struct css_set *cg;
+       struct css_set *cset;
         unsigned long key;
  
         /* check name and function validity */
@@ -4470,17 +4714,17 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
          * this is all done under the css_set_lock.
          */
         write_lock(&css_set_lock);
-       hash_for_each_safe(css_set_table, i, tmp, cg, hlist) {
+       hash_for_each_safe(css_set_table, i, tmp, cset, hlist) {
                 /* skip entries that we already rehashed */
-               if (cg->subsys[ss->subsys_id])
+               if (cset->subsys[ss->subsys_id])
                         continue;
                 /* remove existing entry */
-               hash_del(&cg->hlist);
+               hash_del(&cset->hlist);
                 /* set new value */
-               cg->subsys[ss->subsys_id] = css;
+               cset->subsys[ss->subsys_id] = css;
                 /* recompute hash and restore entry */
-               key = css_set_hash(cg->subsys);
-               hash_add(css_set_table, &cg->hlist, key);
+               key = css_set_hash(cset->subsys);
+               hash_add(css_set_table, &cset->hlist, key);
         }
         write_unlock(&css_set_lock);
  
@@ -4510,7 +4754,7 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys);
   */
  void cgroup_unload_subsys(struct cgroup_subsys *ss)
  {
-       struct cg_cgroup_link *link;
+       struct cgrp_cset_link *link;
  
         BUG_ON(ss->module == NULL);
  
@@ -4539,14 +4783,14 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
          * in loading, we need to pay our respects to the hashtable gods.
          */
         write_lock(&css_set_lock);
-       list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
-               struct css_set *cg = link->cg;
+       list_for_each_entry(link, &dummytop->cset_links, cset_link) {
+               struct css_set *cset = link->cset;
                 unsigned long key;
  
-               hash_del(&cg->hlist);
-               cg->subsys[ss->subsys_id] = NULL;
-               key = css_set_hash(cg->subsys);
-               hash_add(css_set_table, &cg->hlist, key);
+               hash_del(&cset->hlist);
+               cset->subsys[ss->subsys_id] = NULL;
+               key = css_set_hash(cset->subsys);
+               hash_add(css_set_table, &cset->hlist, key);
         }
         write_unlock(&css_set_lock);
  
@@ -4573,7 +4817,7 @@ int __init cgroup_init_early(void)
  {
         int i;
         atomic_set(&init_css_set.refcount, 1);
-       INIT_LIST_HEAD(&init_css_set.cg_links);
+       INIT_LIST_HEAD(&init_css_set.cgrp_links);
         INIT_LIST_HEAD(&init_css_set.tasks);
         INIT_HLIST_NODE(&init_css_set.hlist);
         css_set_count = 1;
@@ -4581,12 +4825,10 @@ int __init cgroup_init_early(void)
         root_count = 1;
         init_task.cgroups = &init_css_set;
  
-       init_css_set_link.cg = &init_css_set;
-       init_css_set_link.cgrp = dummytop;
-       list_add(&init_css_set_link.cgrp_link_list,
-                &rootnode.top_cgroup.css_sets);
-       list_add(&init_css_set_link.cg_link_list,
-                &init_css_set.cg_links);
+       init_cgrp_cset_link.cset = &init_css_set;
+       init_cgrp_cset_link.cgrp = dummytop;
+       list_add(&init_cgrp_cset_link.cset_link, &rootnode.top_cgroup.cset_links);
+       list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links);
  
         for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
                 struct cgroup_subsys *ss = subsys[i];
@@ -4642,7 +4884,15 @@ int __init cgroup_init(void)
         /* Add init_css_set to the hash table */
         key = css_set_hash(init_css_set.subsys);
         hash_add(css_set_table, &init_css_set.hlist, key);
-       BUG_ON(!init_root_id(&rootnode));
+
+       /* allocate id for the dummy hierarchy */
+       mutex_lock(&cgroup_mutex);
+       mutex_lock(&cgroup_root_mutex);
+
+       BUG_ON(cgroup_init_root_id(&rootnode));
+
+       mutex_unlock(&cgroup_root_mutex);
+       mutex_unlock(&cgroup_mutex);
  
         cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj);
         if (!cgroup_kobj) {
@@ -4885,7 +5135,7 @@ void cgroup_post_fork(struct task_struct *child)
   */
  void cgroup_exit(struct task_struct *tsk, int run_callbacks)
  {
-       struct css_set *cg;
+       struct css_set *cset;
         int i;
  
         /*
@@ -4902,7 +5152,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
  
         /* Reassign the task to the init_css_set. */
         task_lock(tsk);
-       cg = tsk->cgroups;
+       cset = tsk->cgroups;
         tsk->cgroups = &init_css_set;
  
         if (run_callbacks && need_forkexit_callback) {
@@ -4915,7 +5165,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
  
                         if (ss->exit) {
                                 struct cgroup *old_cgrp =
-                                       rcu_dereference_raw(cg->subsys[i])->cgroup;
+                                       rcu_dereference_raw(cset->subsys[i])->cgroup;
                                 struct cgroup *cgrp = task_cgroup(tsk, i);
                                 ss->exit(cgrp, old_cgrp, tsk);
                         }
@@ -4923,15 +5173,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
         }
         task_unlock(tsk);
  
-       put_css_set_taskexit(cg);
+       put_css_set_taskexit(cset);
  }
  
  static void check_for_release(struct cgroup *cgrp)
  {
-       /* All of these checks rely on RCU to keep the cgroup
-        * structure alive */
         if (cgroup_is_releasable(cgrp) &&
-           !atomic_read(&cgrp->count) && list_empty(&cgrp->children)) {
+           list_empty(&cgrp->cset_links) && list_empty(&cgrp->children)) {
                 /*
                  * Control Group is currently removeable. If it's not
                  * already queued for a userspace notification, queue
@@ -4940,7 +5188,7 @@ static void check_for_release(struct cgroup *cgrp)
                 int need_schedule_work = 0;
  
                 raw_spin_lock(&release_list_lock);
-               if (!cgroup_is_removed(cgrp) &&
+               if (!cgroup_is_dead(cgrp) &&
                     list_empty(&cgrp->release_list)) {
                         list_add(&cgrp->release_list, &release_list);
                         need_schedule_work = 1;
@@ -4951,34 +5199,6 @@ static void check_for_release(struct cgroup *cgrp)
         }
  }
  
-/* Caller must verify that the css is not for root cgroup */
-bool __css_tryget(struct cgroup_subsys_state *css)
-{
-       while (true) {
-               int t, v;
-
-               v = css_refcnt(css);
-               t = atomic_cmpxchg(&css->refcnt, v, v + 1);
-               if (likely(t == v))
-                       return true;
-               else if (t < 0)
-                       return false;
-               cpu_relax();
-       }
-}
-EXPORT_SYMBOL_GPL(__css_tryget);
-
-/* Caller must verify that the css is not for root cgroup */
-void __css_put(struct cgroup_subsys_state *css)
-{
-       int v;
-
-       v = css_unbias_refcnt(atomic_dec_return(&css->refcnt));
-       if (v == 0)
-               schedule_work(&css->dput_work);
-}
-EXPORT_SYMBOL_GPL(__css_put);
-
  /*
   * Notify userspace when a cgroup is released, by running the
   * configured release agent with the name of the cgroup (path
@@ -5086,9 +5306,7 @@ __setup("cgroup_disable=", cgroup_disable);
   * Functons for CSS ID.
   */
  
-/*
- *To get ID other than 0, this should be called when !cgroup_is_removed().
- */
+/* to get ID other than 0, this should be called when !cgroup_is_dead() */
  unsigned short css_id(struct cgroup_subsys_state *css)
  {
         struct css_id *cssid;
@@ -5098,7 +5316,7 @@ unsigned short css_id(struct cgroup_subsys_state *css)
          * on this or this is under rcu_read_lock(). Once css->id is allocated,
          * it's unchanged until freed.
          */
-       cssid = rcu_dereference_check(css->id, css_refcnt(css));
+       cssid = rcu_dereference_raw(css->id);
  
         if (cssid)
                 return cssid->id;
@@ -5106,18 +5324,6 @@ unsigned short css_id(struct cgroup_subsys_state *css)
  }
  EXPORT_SYMBOL_GPL(css_id);
  
-unsigned short css_depth(struct cgroup_subsys_state *css)
-{
-       struct css_id *cssid;
-
-       cssid = rcu_dereference_check(css->id, css_refcnt(css));
-
-       if (cssid)
-               return cssid->depth;
-       return 0;
-}
-EXPORT_SYMBOL_GPL(css_depth);
-
  /**
   *  css_is_ancestor - test "root" css is an ancestor of "child"
   * @child: the css to be tested.
@@ -5313,11 +5519,6 @@ static void debug_css_free(struct cgroup *cont)
         kfree(cont->subsys[debug_subsys_id]);
  }
  
-static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft)
-{
-       return atomic_read(&cont->count);
-}
-
  static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft)
  {
         return cgroup_task_count(cont);
@@ -5343,13 +5544,13 @@ static int current_css_set_cg_links_read(struct cgroup *cont,
                                          struct cftype *cft,
                                          struct seq_file *seq)
  {
-       struct cg_cgroup_link *link;
-       struct css_set *cg;
+       struct cgrp_cset_link *link;
+       struct css_set *cset;
  
         read_lock(&css_set_lock);
         rcu_read_lock();
-       cg = rcu_dereference(current->cgroups);
-       list_for_each_entry(link, &cg->cg_links, cg_link_list) {
+       cset = rcu_dereference(current->cgroups);
+       list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
                 struct cgroup *c = link->cgrp;
                 const char *name;
  
@@ -5370,15 +5571,15 @@ static int cgroup_css_links_read(struct cgroup *cont,
                                  struct cftype *cft,
                                  struct seq_file *seq)
  {
-       struct cg_cgroup_link *link;
+       struct cgrp_cset_link *link;
  
         read_lock(&css_set_lock);
-       list_for_each_entry(link, &cont->css_sets, cgrp_link_list) {
-               struct css_set *cg = link->cg;
+       list_for_each_entry(link, &cont->cset_links, cset_link) {
+               struct css_set *cset = link->cset;
                 struct task_struct *task;
                 int count = 0;
-               seq_printf(seq, "css_set %p\n", cg);
-               list_for_each_entry(task, &cg->tasks, cg_list) {
+               seq_printf(seq, "css_set %p\n", cset);
+               list_for_each_entry(task, &cset->tasks, cg_list) {
                         if (count++ > MAX_TASKS_SHOWN_PER_CSS) {
                                 seq_puts(seq, "  ...\n");
                                 break;
@@ -5398,10 +5599,6 @@ static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft)
  }
  
  static struct cftype debug_files[] =  {
-       {
-               .name = "cgroup_refcount",
-               .read_u64 = cgroup_refcount_read,
-       },
         {
                 .name = "taskcount",
                 .read_u64 = debug_taskcount_read,