diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f3b44a65fc7a..cafcd7431189 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -737,6 +737,14 @@ struct cgroup_subsys blkio_subsys = { .subsys_id = blkio_subsys_id, .base_cftypes = blkcg_files, .module = THIS_MODULE, + + /* + * blkio subsystem is utterly broken in terms of hierarchy support. + * It treats all cgroups equally regardless of where they're + * located in the hierarchy - all cgroups are treated as if they're + * right below the root. Fix it and remove the following. + */ + .broken_hierarchy = true, }; EXPORT_SYMBOL_GPL(blkio_subsys); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c90eaa803440..68e8df70487e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -496,6 +496,21 @@ struct cgroup_subsys { */ bool __DEPRECATED_clear_css_refs; + /* + * If %false, this subsystem is properly hierarchical - + * configuration, resource accounting and restriction on a parent + * cgroup cover those of its children. If %true, hierarchy support + * is broken in some ways - some subsystems ignore hierarchy + * completely while others are only implemented half-way. + * + * It's now disallowed to create nested cgroups if the subsystem is + * broken and cgroup core will emit a warning message on such + * cases. Eventually, all subsystems will be made properly + * hierarchical and this will go away. + */ + bool broken_hierarchy; + bool warned_broken_hierarchy; + #define MAX_CGROUP_TYPE_NAMELEN 32 const char *name; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 79818507e444..b7d9606b17d7 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3954,8 +3954,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); for_each_subsys(root, ss) { - struct cgroup_subsys_state *css = ss->create(cgrp); + struct cgroup_subsys_state *css; + css = ss->create(cgrp); if (IS_ERR(css)) { err = PTR_ERR(css); goto err_destroy; @@ -3969,6 +3970,15 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, /* At error, ->destroy() callback has to free assigned ID. */ if (clone_children(parent) && ss->post_clone) ss->post_clone(cgrp); + + if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && + parent->parent) { + pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", + current->comm, current->pid, ss->name); + if (!strcmp(ss->name, "memory")) + pr_warning("cgroup: \"memory\" requires setting use_hierarchy to 1 on the root.\n"); + ss->warned_broken_hierarchy = true; + } } list_add(&cgrp->sibling, &cgrp->parent->children); diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 3649fc6b3eaa..b1724ce98981 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -373,4 +373,12 @@ struct cgroup_subsys freezer_subsys = { .can_attach = freezer_can_attach, .fork = freezer_fork, .base_cftypes = files, + + /* + * freezer subsys doesn't handle hierarchy at all. Frozen state + * should be inherited through the hierarchy - if a parent is + * frozen, all its children should be frozen. Fix it and remove + * the following. + */ + .broken_hierarchy = true, }; diff --git a/kernel/events/core.c b/kernel/events/core.c index b7935fcec7d9..f18a0a56e5aa 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7285,5 +7285,12 @@ struct cgroup_subsys perf_subsys = { .destroy = perf_cgroup_destroy, .exit = perf_cgroup_exit, .attach = perf_cgroup_attach, + + /* + * perf_event cgroup doesn't handle nesting correctly. + * ctx->nr_cgroups adjustments should be propagated through the + * cgroup hierarchy. Fix it and remove the following. + */ + .broken_hierarchy = true, }; #endif /* CONFIG_CGROUP_PERF */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 795e525afaba..a72f2ffdc3d0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4973,6 +4973,13 @@ mem_cgroup_create(struct cgroup *cont) } else { res_counter_init(&memcg->res, NULL); res_counter_init(&memcg->memsw, NULL); + /* + * Deeper hierachy with use_hierarchy == false doesn't make + * much sense so let cgroup subsystem know about this + * unfortunate state in our controller. + */ + if (parent && parent != root_mem_cgroup) + mem_cgroup_subsys.broken_hierarchy = true; } memcg->last_scanned_node = MAX_NUMNODES; INIT_LIST_HEAD(&memcg->oom_notify); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index c75e3f9d060f..34f3615b30ca 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -330,7 +330,17 @@ struct cgroup_subsys net_prio_subsys = { .subsys_id = net_prio_subsys_id, #endif .base_cftypes = ss_files, - .module = THIS_MODULE + .module = THIS_MODULE, + + /* + * net_prio has artificial limit on the number of cgroups and + * disallows nesting making it impossible to co-mount it with other + * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ + * limit and properly nest configuration such that children follow + * their parents' configurations by default and are allowed to + * override and remove the following. + */ + .broken_hierarchy = true, }; static int netprio_device_event(struct notifier_block *unused, diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 7743ea8d1d38..907daf99ab2e 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -82,6 +82,15 @@ struct cgroup_subsys net_cls_subsys = { #endif .base_cftypes = ss_files, .module = THIS_MODULE, + + /* + * While net_cls cgroup has the rudimentary hierarchy support of + * inheriting the parent's classid on cgroup creation, it doesn't + * properly propagates config changes in ancestors to their + * descendents. A child should follow the parent's configuration + * but be allowed to override it. Fix it and remove the following. + */ + .broken_hierarchy = true, }; struct cls_cgroup_head { diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 442204cc22d9..4b877a92a7ea 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -457,6 +457,15 @@ struct cgroup_subsys devices_subsys = { .destroy = devcgroup_destroy, .subsys_id = devices_subsys_id, .base_cftypes = dev_cgroup_files, + + /* + * While devices cgroup has the rudimentary hierarchy support which + * checks the parent's restriction, it doesn't properly propagates + * config changes in ancestors to their descendents. A child + * should only be allowed to add more restrictions to the parent's + * configuration. Fix it and remove the following. + */ + .broken_hierarchy = true, }; int __devcgroup_inode_permission(struct inode *inode, int mask)