methods are create/destroy. Any others that are null are presumed to
be successful no-ops.
-struct cgroup_subsys_state *create(struct cgroup_subsys *ss,
- struct cgroup *cgrp)
+struct cgroup_subsys_state *create(struct cgroup *cgrp)
(cgroup_mutex held by caller)
Called to create a subsystem state object for a cgroup. The
it's the root of the hierarchy) and may be an appropriate place for
initialization code.
-void destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+void destroy(struct cgroup *cgrp)
(cgroup_mutex held by caller)
The cgroup system is about to destroy the passed cgroup; the subsystem
newly-created cgroup if an error occurs after this subsystem's
create() method has been called for the new cgroup).
-int pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
+int pre_destroy(struct cgroup *cgrp);
Called before checking the reference count on each subsystem. This may
be useful for subsystems which have some extra references even if
rmdir() will fail with it. From this behavior, pre_destroy() can be
called multiple times against a cgroup.
-int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
(cgroup_mutex held by caller)
Called prior to moving one or more tasks into a cgroup; if the
while the caller holds cgroup_mutex and it is ensured that either
attach() or cancel_attach() will be called in future.
-void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
(cgroup_mutex held by caller)
Called when a task attach operation has failed after can_attach() has succeeded.
This will be called only about subsystems whose can_attach() operation have
succeeded. The parameters are identical to can_attach().
-void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
(cgroup_mutex held by caller)
Called after the task has been attached to the cgroup, to allow any
post-attachment activity that requires memory allocations or blocking.
The parameters are identical to can_attach().
-void fork(struct cgroup_subsy *ss, struct task_struct *task)
+void fork(struct task_struct *task)
Called when a task is forked into a cgroup.
-void exit(struct cgroup_subsys *ss, struct task_struct *task)
+void exit(struct task_struct *task)
Called during task exit.
-int populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+int populate(struct cgroup *cgrp)
(cgroup_mutex held by caller)
Called after creation of a cgroup to allow a subsystem to populate
method can return an error code, the error code is currently not
always handled well.
-void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
+void post_clone(struct cgroup *cgrp)
(cgroup_mutex held by caller)
Called during cgroup_create() to do any parameter
example in cpusets, no task may attach before 'cpus' and 'mems' are set
up.
-void bind(struct cgroup_subsys *ss, struct cgroup *root)
+void bind(struct cgroup *root)
(cgroup_mutex and ss->hierarchy_mutex held by caller)
Called when a cgroup subsystem is rebound to a different hierarchy
struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
EXPORT_SYMBOL_GPL(blkio_root_cgroup);
-static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
- struct cgroup *);
-static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
- struct cgroup_taskset *);
-static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
- struct cgroup_taskset *);
-static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
+static struct cgroup_subsys_state *blkiocg_create(struct cgroup *);
+static int blkiocg_can_attach(struct cgroup *, struct cgroup_taskset *);
+static void blkiocg_attach(struct cgroup *, struct cgroup_taskset *);
+static void blkiocg_destroy(struct cgroup *);
static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
/* for encoding cft->private value on file */
ARRAY_SIZE(blkio_files));
}
-static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+static void blkiocg_destroy(struct cgroup *cgroup)
{
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
unsigned long flags;
kfree(blkcg);
}
-static struct cgroup_subsys_state *
-blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
+static struct cgroup_subsys_state *blkiocg_create(struct cgroup *cgroup)
{
struct blkio_cgroup *blkcg;
struct cgroup *parent = cgroup->parent;
* of the main cic data structures. For now we allow a task to change
* its cgroup only if it's the only owner of its ioc.
*/
-static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *task;
struct io_context *ioc;
return ret;
}
-static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+static void blkiocg_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *task;
struct io_context *ioc;
CGRP_CLONE_CHILDREN,
};
-/* which pidlist file are we talking about? */
-enum cgroup_filetype {
- CGROUP_FILE_PROCS,
- CGROUP_FILE_TASKS,
-};
-
-/*
- * A pidlist is a list of pids that virtually represents the contents of one
- * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
- * a pair (one each for procs, tasks) for each pid namespace that's relevant
- * to the cgroup.
- */
-struct cgroup_pidlist {
- /*
- * used to find which pidlist is wanted. doesn't change as long as
- * this particular list stays in the list.
- */
- struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
- /* array of xids */
- pid_t *list;
- /* how many elements the above list has */
- int length;
- /* how many files are using the current array */
- int use_count;
- /* each of these stored in a list by its cgroup */
- struct list_head links;
- /* pointer to the cgroup we belong to, for list removal purposes */
- struct cgroup *owner;
- /* protects the other fields */
- struct rw_semaphore mutex;
-};
-
struct cgroup {
unsigned long flags; /* "unsigned long" so bitops work */
*/
struct cgroup_subsys {
- struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
- struct cgroup *cgrp);
- int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
- void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
- int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset);
- void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset);
- void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset);
- void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
- void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *task);
- int (*populate)(struct cgroup_subsys *ss,
- struct cgroup *cgrp);
- void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
- void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
+ struct cgroup_subsys_state *(*create)(struct cgroup *cgrp);
+ int (*pre_destroy)(struct cgroup *cgrp);
+ void (*destroy)(struct cgroup *cgrp);
+ int (*can_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
+ void (*cancel_attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
+ void (*attach)(struct cgroup *cgrp, struct cgroup_taskset *tset);
+ void (*fork)(struct task_struct *task);
+ void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp,
+ struct task_struct *task);
+ int (*populate)(struct cgroup_subsys *ss, struct cgroup *cgrp);
+ void (*post_clone)(struct cgroup *cgrp);
+ void (*bind)(struct cgroup *root);
int subsys_id;
int active;
int cgroup_attach_task(struct cgroup *, struct task_struct *);
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
-static inline int cgroup_attach_task_current_cg(struct task_struct *tsk)
-{
- return cgroup_attach_task_all(current, tsk);
-}
-
/*
* CSS ID is ID for cgroup_subsys_state structs under subsys. This only works
* if cgroup_subsys.use_id == true. It can be used for looking up and scanning.
{
return 0;
}
-static inline int cgroup_attach_task_current_cg(struct task_struct *t)
-{
- return 0;
-}
#endif /* !CONFIG_CGROUPS */
struct cgroup_subsys;
#ifdef CONFIG_NET
int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss);
-void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss);
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp);
#else
static inline
int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
return 0;
}
static inline
-void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp)
{
}
#endif
*/
int (*init_cgroup)(struct cgroup *cgrp,
struct cgroup_subsys *ss);
- void (*destroy_cgroup)(struct cgroup *cgrp,
- struct cgroup_subsys *ss);
+ void (*destroy_cgroup)(struct cgroup *cgrp);
struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg);
#endif
};
struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
-void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
+void tcp_destroy_cgroup(struct cgroup *cgrp);
unsigned long long tcp_max_memory(const struct mem_cgroup *memcg);
void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx);
#endif /* _TCP_MEMCG_H */
for_each_subsys(cgrp->root, ss)
if (ss->pre_destroy) {
- ret = ss->pre_destroy(ss, cgrp);
+ ret = ss->pre_destroy(cgrp);
if (ret)
break;
}
* Release the subsystem state objects.
*/
for_each_subsys(cgrp->root, ss)
- ss->destroy(ss, cgrp);
+ ss->destroy(cgrp);
cgrp->root->number_of_cgroups--;
mutex_unlock(&cgroup_mutex);
list_move(&ss->sibling, &root->subsys_list);
ss->root = root;
if (ss->bind)
- ss->bind(ss, cgrp);
+ ss->bind(cgrp);
mutex_unlock(&ss->hierarchy_mutex);
/* refcount was already taken, and we're keeping it */
} else if (bit & removed_bits) {
BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
mutex_lock(&ss->hierarchy_mutex);
if (ss->bind)
- ss->bind(ss, dummytop);
+ ss->bind(dummytop);
dummytop->subsys[i]->cgroup = dummytop;
cgrp->subsys[i] = NULL;
subsys[i]->root = &rootnode;
struct task_and_cgroup {
struct task_struct *task;
struct cgroup *cgrp;
+ struct css_set *cg;
};
struct cgroup_taskset {
* will already exist. If not set, this function might sleep, and can fail with
* -ENOMEM. Must be called with cgroup_mutex and threadgroup locked.
*/
-static int cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
- struct task_struct *tsk, bool guarantee)
+static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
+ struct task_struct *tsk, struct css_set *newcg)
{
struct css_set *oldcg;
- struct css_set *newcg;
/*
* We are synchronized through threadgroup_lock() against PF_EXITING
WARN_ON_ONCE(tsk->flags & PF_EXITING);
oldcg = tsk->cgroups;
- /* locate or allocate a new css_set for this task. */
- if (guarantee) {
- /* we know the css_set we want already exists. */
- struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
- read_lock(&css_set_lock);
- newcg = find_existing_css_set(oldcg, cgrp, template);
- BUG_ON(!newcg);
- get_css_set(newcg);
- read_unlock(&css_set_lock);
- } else {
- might_sleep();
- /* find_css_set will give us newcg already referenced. */
- newcg = find_css_set(oldcg, cgrp);
- if (!newcg)
- return -ENOMEM;
- }
-
task_lock(tsk);
rcu_assign_pointer(tsk->cgroups, newcg);
task_unlock(tsk);
put_css_set(oldcg);
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
- return 0;
}
/**
struct cgroup *oldcgrp;
struct cgroupfs_root *root = cgrp->root;
struct cgroup_taskset tset = { };
+ struct css_set *newcg;
/* @tsk either already exited or can't exit until the end */
if (tsk->flags & PF_EXITING)
for_each_subsys(root, ss) {
if (ss->can_attach) {
- retval = ss->can_attach(ss, cgrp, &tset);
+ retval = ss->can_attach(cgrp, &tset);
if (retval) {
/*
* Remember on which subsystem the can_attach()
}
}
- retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
- if (retval)
+ newcg = find_css_set(tsk->cgroups, cgrp);
+ if (!newcg) {
+ retval = -ENOMEM;
goto out;
+ }
+
+ cgroup_task_migrate(cgrp, oldcgrp, tsk, newcg);
for_each_subsys(root, ss) {
if (ss->attach)
- ss->attach(ss, cgrp, &tset);
+ ss->attach(cgrp, &tset);
}
synchronize_rcu();
*/
break;
if (ss->cancel_attach)
- ss->cancel_attach(ss, cgrp, &tset);
+ ss->cancel_attach(cgrp, &tset);
}
}
return retval;
}
EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
-/*
- * cgroup_attach_proc works in two stages, the first of which prefetches all
- * new css_sets needed (to make sure we have enough memory before committing
- * to the move) and stores them in a list of entries of the following type.
- * TODO: possible optimization: use css_set->rcu_head for chaining instead
- */
-struct cg_list_entry {
- struct css_set *cg;
- struct list_head links;
-};
-
-static bool css_set_check_fetched(struct cgroup *cgrp,
- struct task_struct *tsk, struct css_set *cg,
- struct list_head *newcg_list)
-{
- struct css_set *newcg;
- struct cg_list_entry *cg_entry;
- struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
-
- read_lock(&css_set_lock);
- newcg = find_existing_css_set(cg, cgrp, template);
- read_unlock(&css_set_lock);
-
- /* doesn't exist at all? */
- if (!newcg)
- return false;
- /* see if it's already in the list */
- list_for_each_entry(cg_entry, newcg_list, links)
- if (cg_entry->cg == newcg)
- return true;
-
- /* not found */
- return false;
-}
-
-/*
- * Find the new css_set and store it in the list in preparation for moving the
- * given task to the given cgroup. Returns 0 or -ENOMEM.
- */
-static int css_set_prefetch(struct cgroup *cgrp, struct css_set *cg,
- struct list_head *newcg_list)
-{
- struct css_set *newcg;
- struct cg_list_entry *cg_entry;
-
- /* ensure a new css_set will exist for this thread */
- newcg = find_css_set(cg, cgrp);
- if (!newcg)
- return -ENOMEM;
- /* add it to the list */
- cg_entry = kmalloc(sizeof(struct cg_list_entry), GFP_KERNEL);
- if (!cg_entry) {
- put_css_set(newcg);
- return -ENOMEM;
- }
- cg_entry->cg = newcg;
- list_add(&cg_entry->links, newcg_list);
- return 0;
-}
-
/**
* cgroup_attach_proc - attach all threads in a threadgroup to a cgroup
* @cgrp: the cgroup to attach to
int retval, i, group_size;
struct cgroup_subsys *ss, *failed_ss = NULL;
/* guaranteed to be initialized later, but the compiler needs this */
- struct css_set *oldcg;
struct cgroupfs_root *root = cgrp->root;
/* threadgroup list cursor and array */
struct task_struct *tsk;
struct task_and_cgroup *tc;
struct flex_array *group;
struct cgroup_taskset tset = { };
- /*
- * we need to make sure we have css_sets for all the tasks we're
- * going to move -before- we actually start moving them, so that in
- * case we get an ENOMEM we can bail out before making any changes.
- */
- struct list_head newcg_list;
- struct cg_list_entry *cg_entry, *temp_nobe;
/*
* step 0: in order to do expensive, possibly blocking operations for
if (retval)
goto out_free_group_list;
- /* prevent changes to the threadgroup list while we take a snapshot. */
- read_lock(&tasklist_lock);
- if (!thread_group_leader(leader)) {
- /*
- * a race with de_thread from another thread's exec() may strip
- * us of our leadership, making while_each_thread unsafe to use
- * on this task. if this happens, there is no choice but to
- * throw this task away and try again (from cgroup_procs_write);
- * this is "double-double-toil-and-trouble-check locking".
- */
- read_unlock(&tasklist_lock);
- retval = -EAGAIN;
- goto out_free_group_list;
- }
-
tsk = leader;
i = 0;
+ /*
+ * Prevent freeing of tasks while we take a snapshot. Tasks that are
+ * already PF_EXITING could be freed from underneath us unless we
+ * take an rcu_read_lock.
+ */
+ rcu_read_lock();
do {
struct task_and_cgroup ent;
/* as per above, nr_threads may decrease, but not increase. */
BUG_ON(i >= group_size);
- /*
- * saying GFP_ATOMIC has no effect here because we did prealloc
- * earlier, but it's good form to communicate our expectations.
- */
ent.task = tsk;
ent.cgrp = task_cgroup_from_root(tsk, root);
/* nothing to do if this task is already in the cgroup */
if (ent.cgrp == cgrp)
continue;
+ /*
+ * saying GFP_ATOMIC has no effect here because we did prealloc
+ * earlier, but it's good form to communicate our expectations.
+ */
retval = flex_array_put(group, i, &ent, GFP_ATOMIC);
BUG_ON(retval != 0);
i++;
} while_each_thread(leader, tsk);
+ rcu_read_unlock();
/* remember the number of threads in the array for later. */
group_size = i;
tset.tc_array = group;
tset.tc_array_len = group_size;
- read_unlock(&tasklist_lock);
/* methods shouldn't be called if no task is actually migrating */
retval = 0;
*/
for_each_subsys(root, ss) {
if (ss->can_attach) {
- retval = ss->can_attach(ss, cgrp, &tset);
+ retval = ss->can_attach(cgrp, &tset);
if (retval) {
failed_ss = ss;
goto out_cancel_attach;
* step 2: make sure css_sets exist for all threads to be migrated.
* we use find_css_set, which allocates a new one if necessary.
*/
- INIT_LIST_HEAD(&newcg_list);
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
- oldcg = tc->task->cgroups;
-
- /* if we don't already have it in the list get a new one */
- if (!css_set_check_fetched(cgrp, tc->task, oldcg,
- &newcg_list)) {
- retval = css_set_prefetch(cgrp, oldcg, &newcg_list);
- if (retval)
- goto out_list_teardown;
+ tc->cg = find_css_set(tc->task->cgroups, cgrp);
+ if (!tc->cg) {
+ retval = -ENOMEM;
+ goto out_put_css_set_refs;
}
}
*/
for (i = 0; i < group_size; i++) {
tc = flex_array_get(group, i);
- retval = cgroup_task_migrate(cgrp, tc->cgrp, tc->task, true);
- BUG_ON(retval);
+ cgroup_task_migrate(cgrp, tc->cgrp, tc->task, tc->cg);
}
/* nothing is sensitive to fork() after this point. */
*/
for_each_subsys(root, ss) {
if (ss->attach)
- ss->attach(ss, cgrp, &tset);
+ ss->attach(cgrp, &tset);
}
/*
synchronize_rcu();
cgroup_wakeup_rmdir_waiter(cgrp);
retval = 0;
-out_list_teardown:
- /* clean up the list of prefetched css_sets. */
- list_for_each_entry_safe(cg_entry, temp_nobe, &newcg_list, links) {
- list_del(&cg_entry->links);
- put_css_set(cg_entry->cg);
- kfree(cg_entry);
+out_put_css_set_refs:
+ if (retval) {
+ for (i = 0; i < group_size; i++) {
+ tc = flex_array_get(group, i);
+ if (!tc->cg)
+ break;
+ put_css_set(tc->cg);
+ }
}
out_cancel_attach:
- /* same deal as in cgroup_attach_task */
if (retval) {
for_each_subsys(root, ss) {
if (ss == failed_ss)
break;
if (ss->cancel_attach)
- ss->cancel_attach(ss, cgrp, &tset);
+ ss->cancel_attach(cgrp, &tset);
}
}
out_free_group_list:
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
+retry_find_task:
+ rcu_read_lock();
if (pid) {
- rcu_read_lock();
tsk = find_task_by_vpid(pid);
if (!tsk) {
rcu_read_unlock();
- cgroup_unlock();
- return -ESRCH;
- }
- if (threadgroup) {
- /*
- * RCU protects this access, since tsk was found in the
- * tid map. a race with de_thread may cause group_leader
- * to stop being the leader, but cgroup_attach_proc will
- * detect it later.
- */
- tsk = tsk->group_leader;
+ ret= -ESRCH;
+ goto out_unlock_cgroup;
}
/*
* even if we're attaching all tasks in the thread group, we
cred->euid != tcred->uid &&
cred->euid != tcred->suid) {
rcu_read_unlock();
- cgroup_unlock();
- return -EACCES;
+ ret = -EACCES;
+ goto out_unlock_cgroup;
}
- get_task_struct(tsk);
- rcu_read_unlock();
- } else {
- if (threadgroup)
- tsk = current->group_leader;
- else
- tsk = current;
- get_task_struct(tsk);
- }
-
- threadgroup_lock(tsk);
+ } else
+ tsk = current;
if (threadgroup)
+ tsk = tsk->group_leader;
+ get_task_struct(tsk);
+ rcu_read_unlock();
+
+ threadgroup_lock(tsk);
+ if (threadgroup) {
+ if (!thread_group_leader(tsk)) {
+ /*
+ * a race with de_thread from another thread's exec()
+ * may strip us of our leadership, if this happens,
+ * there is no choice but to throw this task away and
+ * try again; this is
+ * "double-double-toil-and-trouble-check locking".
+ */
+ threadgroup_unlock(tsk);
+ put_task_struct(tsk);
+ goto retry_find_task;
+ }
ret = cgroup_attach_proc(cgrp, tsk);
- else
+ } else
ret = cgroup_attach_task(cgrp, tsk);
-
threadgroup_unlock(tsk);
put_task_struct(tsk);
+out_unlock_cgroup:
cgroup_unlock();
return ret;
}
static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid)
{
- int ret;
- do {
- /*
- * attach_proc fails with -EAGAIN if threadgroup leadership
- * changes in the middle of the operation, in which case we need
- * to find the task_struct for the new leader and start over.
- */
- ret = attach_task_by_pid(cgrp, tgid, true);
- } while (ret == -EAGAIN);
- return ret;
+ return attach_task_by_pid(cgrp, tgid, true);
}
/**
* using their cgroups capability, we don't maintain the lists running
* through each css_set to its tasks until we see the list actually
* used - in other words after the first call to cgroup_iter_start().
- *
- * The tasklist_lock is not held here, as do_each_thread() and
- * while_each_thread() are protected by RCU.
*/
static void cgroup_enable_task_cg_lists(void)
{
struct task_struct *p, *g;
write_lock(&css_set_lock);
use_task_css_set_links = 1;
+ /*
+ * We need tasklist_lock because RCU is not safe against
+ * while_each_thread(). Besides, a forking task that has passed
+ * cgroup_post_fork() without seeing use_task_css_set_links = 1
+ * is not guaranteed to have its child immediately visible in the
+ * tasklist if we walk through it with RCU.
+ */
+ read_lock(&tasklist_lock);
do_each_thread(g, p) {
task_lock(p);
/*
list_add(&p->cg_list, &p->cgroups->tasks);
task_unlock(p);
} while_each_thread(g, p);
+ read_unlock(&tasklist_lock);
write_unlock(&css_set_lock);
}
*
*/
+/* which pidlist file are we talking about? */
+enum cgroup_filetype {
+ CGROUP_FILE_PROCS,
+ CGROUP_FILE_TASKS,
+};
+
+/*
+ * A pidlist is a list of pids that virtually represents the contents of one
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
+ * to the cgroup.
+ */
+struct cgroup_pidlist {
+ /*
+ * used to find which pidlist is wanted. doesn't change as long as
+ * this particular list stays in the list.
+ */
+ struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
+ /* array of xids */
+ pid_t *list;
+ /* how many elements the above list has */
+ int length;
+ /* how many files are using the current array */
+ int use_count;
+ /* each of these stored in a list by its cgroup */
+ struct list_head links;
+ /* pointer to the cgroup we belong to, for list removal purposes */
+ struct cgroup *owner;
+ /* protects the other fields */
+ struct rw_semaphore mutex;
+};
+
/*
* The following two functions "fix" the issue where there are more pids
* than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
for_each_subsys(root, ss) {
- struct cgroup_subsys_state *css = ss->create(ss, cgrp);
+ struct cgroup_subsys_state *css = ss->create(cgrp);
if (IS_ERR(css)) {
err = PTR_ERR(css);
}
/* At error, ->destroy() callback has to free assigned ID. */
if (clone_children(parent) && ss->post_clone)
- ss->post_clone(ss, cgrp);
+ ss->post_clone(cgrp);
}
cgroup_lock_hierarchy(root);
for_each_subsys(root, ss) {
if (cgrp->subsys[ss->subsys_id])
- ss->destroy(ss, cgrp);
+ ss->destroy(cgrp);
}
mutex_unlock(&cgroup_mutex);
/* Create the top cgroup state for this subsystem */
list_add(&ss->sibling, &rootnode.subsys_list);
ss->root = &rootnode;
- css = ss->create(ss, dummytop);
+ css = ss->create(dummytop);
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_cgroup_css(css, ss, dummytop);
* no ss->create seems to need anything important in the ss struct, so
* this can happen first (i.e. before the rootnode attachment).
*/
- css = ss->create(ss, dummytop);
+ css = ss->create(dummytop);
if (IS_ERR(css)) {
/* failure case - need to deassign the subsys[] slot. */
subsys[i] = NULL;
int ret = cgroup_init_idr(ss, css);
if (ret) {
dummytop->subsys[ss->subsys_id] = NULL;
- ss->destroy(ss, dummytop);
+ ss->destroy(dummytop);
subsys[i] = NULL;
mutex_unlock(&cgroup_mutex);
return ret;
* pointer to find their state. note that this also takes care of
* freeing the css_id.
*/
- ss->destroy(ss, dummytop);
+ ss->destroy(dummytop);
dummytop->subsys[ss->subsys_id] = NULL;
mutex_unlock(&cgroup_mutex);
for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (ss->fork)
- ss->fork(ss, child);
+ ss->fork(child);
}
}
}
*/
void cgroup_post_fork(struct task_struct *child)
{
+ /*
+ * use_task_css_set_links is set to 1 before we walk the tasklist
+ * under the tasklist_lock and we read it here after we added the child
+ * to the tasklist under the tasklist_lock as well. If the child wasn't
+ * yet in the tasklist when we walked through it from
+ * cgroup_enable_task_cg_lists(), then use_task_css_set_links value
+ * should be visible now due to the paired locking and barriers implied
+ * by LOCK/UNLOCK: it is written before the tasklist_lock unlock
+ * in cgroup_enable_task_cg_lists() and read here after the tasklist_lock
+ * lock on fork.
+ */
if (use_task_css_set_links) {
write_lock(&css_set_lock);
if (list_empty(&child->cg_list)) {
struct cgroup *old_cgrp =
rcu_dereference_raw(cg->subsys[i])->cgroup;
struct cgroup *cgrp = task_cgroup(tsk, i);
- ss->exit(ss, cgrp, old_cgrp, tsk);
+ ss->exit(cgrp, old_cgrp, tsk);
}
}
}
}
#ifdef CONFIG_CGROUP_DEBUG
-static struct cgroup_subsys_state *debug_create(struct cgroup_subsys *ss,
- struct cgroup *cont)
+static struct cgroup_subsys_state *debug_create(struct cgroup *cont)
{
struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
return css;
}
-static void debug_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+static void debug_destroy(struct cgroup *cont)
{
kfree(cont->subsys[debug_subsys_id]);
}
* task->alloc_lock (inside __thaw_task(), prevents race with refrigerator())
* sighand->siglock
*/
-static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
- struct cgroup *cgroup)
+static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
{
struct freezer *freezer;
return &freezer->css;
}
-static void freezer_destroy(struct cgroup_subsys *ss,
- struct cgroup *cgroup)
+static void freezer_destroy(struct cgroup *cgroup)
{
struct freezer *freezer = cgroup_freezer(cgroup);
* a write to that file racing against an attach, and hence the
* can_attach() result will remain valid until the attach completes.
*/
-static int freezer_can_attach(struct cgroup_subsys *ss,
- struct cgroup *new_cgroup,
+static int freezer_can_attach(struct cgroup *new_cgroup,
struct cgroup_taskset *tset)
{
struct freezer *freezer;
return 0;
}
-static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
+static void freezer_fork(struct task_struct *task)
{
struct freezer *freezer;
static nodemask_t cpuset_attach_nodemask_to;
/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct cpuset *cs = cgroup_cs(cgrp);
struct task_struct *task;
return 0;
}
-static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct mm_struct *mm;
struct task_struct *task;
* (and likewise for mems) to the new cgroup. Called with cgroup_mutex
* held.
*/
-static void cpuset_post_clone(struct cgroup_subsys *ss,
- struct cgroup *cgroup)
+static void cpuset_post_clone(struct cgroup *cgroup)
{
struct cgroup *parent, *child;
struct cpuset *cs, *parent_cs;
/*
* cpuset_create - create a cpuset
- * ss: cpuset cgroup subsystem
* cont: control group that the new cpuset will be part of
*/
-static struct cgroup_subsys_state *cpuset_create(
- struct cgroup_subsys *ss,
- struct cgroup *cont)
+static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont)
{
struct cpuset *cs;
struct cpuset *parent;
* will call async_rebuild_sched_domains().
*/
-static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+static void cpuset_destroy(struct cgroup *cont)
{
struct cpuset *cs = cgroup_cs(cont);
device_initcall(perf_event_sysfs_init);
#ifdef CONFIG_CGROUP_PERF
-static struct cgroup_subsys_state *perf_cgroup_create(
- struct cgroup_subsys *ss, struct cgroup *cont)
+static struct cgroup_subsys_state *perf_cgroup_create(struct cgroup *cont)
{
struct perf_cgroup *jc;
return &jc->css;
}
-static void perf_cgroup_destroy(struct cgroup_subsys *ss,
- struct cgroup *cont)
+static void perf_cgroup_destroy(struct cgroup *cont)
{
struct perf_cgroup *jc;
jc = container_of(cgroup_subsys_state(cont, perf_subsys_id),
return 0;
}
-static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup_taskset *tset)
+static void perf_cgroup_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *task;
task_function_call(task, __perf_cgroup_move, task);
}
-static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *task)
+static void perf_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
+ struct task_struct *task)
{
/*
* cgroup_exit() is called in the copy_process() failure path.
struct task_group, css);
}
-static struct cgroup_subsys_state *
-cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
+static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp)
{
struct task_group *tg, *parent;
return &tg->css;
}
-static void
-cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+static void cpu_cgroup_destroy(struct cgroup *cgrp)
{
struct task_group *tg = cgroup_tg(cgrp);
sched_destroy_group(tg);
}
-static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+static int cpu_cgroup_can_attach(struct cgroup *cgrp,
struct cgroup_taskset *tset)
{
struct task_struct *task;
return 0;
}
-static void cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+static void cpu_cgroup_attach(struct cgroup *cgrp,
struct cgroup_taskset *tset)
{
struct task_struct *task;
}
static void
-cpu_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp,
- struct cgroup *old_cgrp, struct task_struct *task)
+cpu_cgroup_exit(struct cgroup *cgrp, struct cgroup *old_cgrp,
+ struct task_struct *task)
{
/*
* cgroup_exit() is called in the copy_process() failure path.
*/
/* create a new cpu accounting group */
-static struct cgroup_subsys_state *cpuacct_create(
- struct cgroup_subsys *ss, struct cgroup *cgrp)
+static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp)
{
struct cpuacct *ca;
}
/* destroy an existing cpu accounting group */
-static void
-cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+static void cpuacct_destroy(struct cgroup *cgrp)
{
struct cpuacct *ca = cgroup_ca(cgrp);
return mem_cgroup_sockets_init(cont, ss);
};
-static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
- struct cgroup *cont)
+static void kmem_cgroup_destroy(struct cgroup *cont)
{
- mem_cgroup_sockets_destroy(cont, ss);
+ mem_cgroup_sockets_destroy(cont);
}
#else
static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
return 0;
}
-static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
- struct cgroup *cont)
+static void kmem_cgroup_destroy(struct cgroup *cont)
{
}
#endif
}
static struct cgroup_subsys_state * __ref
-mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
+mem_cgroup_create(struct cgroup *cont)
{
struct mem_cgroup *memcg, *parent;
long error = -ENOMEM;
return ERR_PTR(error);
}
-static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
- struct cgroup *cont)
+static int mem_cgroup_pre_destroy(struct cgroup *cont)
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
return mem_cgroup_force_empty(memcg, false);
}
-static void mem_cgroup_destroy(struct cgroup_subsys *ss,
- struct cgroup *cont)
+static void mem_cgroup_destroy(struct cgroup *cont)
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
- kmem_cgroup_destroy(ss, cont);
+ kmem_cgroup_destroy(cont);
mem_cgroup_put(memcg);
}
mem_cgroup_end_move(from);
}
-static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
- struct cgroup *cgroup,
- struct cgroup_taskset *tset)
+static int mem_cgroup_can_attach(struct cgroup *cgroup,
+ struct cgroup_taskset *tset)
{
struct task_struct *p = cgroup_taskset_first(tset);
int ret = 0;
return ret;
}
-static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
- struct cgroup *cgroup,
- struct cgroup_taskset *tset)
+static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
+ struct cgroup_taskset *tset)
{
mem_cgroup_clear_mc();
}
up_read(&mm->mmap_sem);
}
-static void mem_cgroup_move_task(struct cgroup_subsys *ss,
- struct cgroup *cont,
- struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(struct cgroup *cont,
+ struct cgroup_taskset *tset)
{
struct task_struct *p = cgroup_taskset_first(tset);
struct mm_struct *mm = get_task_mm(p);
mem_cgroup_clear_mc();
}
#else /* !CONFIG_MMU */
-static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
- struct cgroup *cgroup,
- struct cgroup_taskset *tset)
+static int mem_cgroup_can_attach(struct cgroup *cgroup,
+ struct cgroup_taskset *tset)
{
return 0;
}
-static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
- struct cgroup *cgroup,
- struct cgroup_taskset *tset)
+static void mem_cgroup_cancel_attach(struct cgroup *cgroup,
+ struct cgroup_taskset *tset)
{
}
-static void mem_cgroup_move_task(struct cgroup_subsys *ss,
- struct cgroup *cont,
- struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(struct cgroup *cont,
+ struct cgroup_taskset *tset)
{
}
#endif
#include <net/sock.h>
#include <net/netprio_cgroup.h>
-static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
- struct cgroup *cgrp);
-static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
+static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp);
+static void cgrp_destroy(struct cgroup *cgrp);
static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
struct cgroup_subsys net_prio_subsys = {
rtnl_unlock();
}
-static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
- struct cgroup *cgrp)
+static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
{
struct cgroup_netprio_state *cs;
int ret;
return &cs->css;
}
-static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+static void cgrp_destroy(struct cgroup *cgrp)
{
struct cgroup_netprio_state *cs;
struct net_device *dev;
out:
list_for_each_entry_continue_reverse(proto, &proto_list, node)
if (proto->destroy_cgroup)
- proto->destroy_cgroup(cgrp, ss);
+ proto->destroy_cgroup(cgrp);
mutex_unlock(&proto_list_mutex);
return ret;
}
-void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp)
{
struct proto *proto;
mutex_lock(&proto_list_mutex);
list_for_each_entry_reverse(proto, &proto_list, node)
if (proto->destroy_cgroup)
- proto->destroy_cgroup(cgrp, ss);
+ proto->destroy_cgroup(cgrp);
mutex_unlock(&proto_list_mutex);
}
#endif
}
EXPORT_SYMBOL(tcp_init_cgroup);
-void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
+void tcp_destroy_cgroup(struct cgroup *cgrp)
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
struct cg_proto *cg_proto;
#include <net/sock.h>
#include <net/cls_cgroup.h>
-static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
- struct cgroup *cgrp);
-static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
+static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp);
+static void cgrp_destroy(struct cgroup *cgrp);
static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
struct cgroup_subsys net_cls_subsys = {
struct cgroup_cls_state, css);
}
-static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
- struct cgroup *cgrp)
+static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp)
{
struct cgroup_cls_state *cs;
return &cs->css;
}
-static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+static void cgrp_destroy(struct cgroup *cgrp)
{
kfree(cgrp_cls_state(cgrp));
}
struct cgroup_subsys devices_subsys;
-static int devcgroup_can_attach(struct cgroup_subsys *ss,
- struct cgroup *new_cgrp, struct cgroup_taskset *set)
+static int devcgroup_can_attach(struct cgroup *new_cgrp,
+ struct cgroup_taskset *set)
{
struct task_struct *task = cgroup_taskset_first(set);
/*
* called from kernel/cgroup.c with cgroup_lock() held.
*/
-static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
- struct cgroup *cgroup)
+static struct cgroup_subsys_state *devcgroup_create(struct cgroup *cgroup)
{
struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
struct cgroup *parent_cgroup;
return &dev_cgroup->css;
}
-static void devcgroup_destroy(struct cgroup_subsys *ss,
- struct cgroup *cgroup)
+static void devcgroup_destroy(struct cgroup *cgroup)
{
struct dev_cgroup *dev_cgroup;
struct dev_whitelist_item *wh, *tmp;