diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b6b26faf1740..d0def7fc2848 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -91,6 +91,14 @@ static DEFINE_MUTEX(cgroup_mutex); static DEFINE_MUTEX(cgroup_root_mutex); +/* + * cgroup destruction makes heavy use of work items and there can be a lot + * of concurrent destructions. Use a separate workqueue so that cgroup + * destruction work items don't end up filling up max_active of system_wq + * which may lead to deadlock. + */ +static struct workqueue_struct *cgroup_destroy_wq; + /* * Generate an array of cgroup subsystem pointers. At boot time, this is * populated with the built in subsystems, and modular subsystems are @@ -873,7 +881,7 @@ static void cgroup_free_rcu(struct rcu_head *head) { struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); - schedule_work(&cgrp->free_work); + queue_work(cgroup_destroy_wq, &cgrp->free_work); } static void cgroup_diput(struct dentry *dentry, struct inode *inode) @@ -4686,6 +4694,22 @@ out: return err; } +static int __init cgroup_wq_init(void) +{ + /* + * There isn't much point in executing destruction path in + * parallel. Good chunk is serialized with cgroup_mutex anyway. + * Use 1 for @max_active. + * + * We would prefer to do this in cgroup_init() above, but that + * is called before init_workqueues(): so leave this until after. + */ + cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); + BUG_ON(!cgroup_destroy_wq); + return 0; +} +core_initcall(cgroup_wq_init); + /* * proc_cgroup_show() * - Print task's cgroup paths into seq_file, one line for each hierarchy @@ -4996,7 +5020,7 @@ void __css_put(struct cgroup_subsys_state *css) v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); if (v == 0) - schedule_work(&css->dput_work); + queue_work(cgroup_destroy_wq, &css->dput_work); } EXPORT_SYMBOL_GPL(__css_put);