From f3b577dec1f2ce32d2db6d2ca6badff7002512af Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Tue, 1 Jun 2010 14:06:13 +0100 Subject: [PATCH 1/3] rcu: apply RCU protection to wake_affine() The task_group() function returns a pointer that must be protected by either RCU, the ->alloc_lock, or the cgroup lock (see the rcu_dereference_check() in task_subsys_state(), which is invoked by task_group()). The wake_affine() function currently does none of these, which means that a concurrent update would be within its rights to free the structure returned by task_group(). Because wake_affine() uses this structure only to compute load-balancing heuristics, there is no reason to acquire either of the two locks. Therefore, this commit introduces an RCU read-side critical section that starts before the first call to task_group() and ends after the last use of the "tg" pointer returned from task_group(). Thanks to Li Zefan for pointing out the need to extend the RCU read-side critical section from that proposed by the original patch. Signed-off-by: Daniel J Blueman Signed-off-by: Paul E. McKenney --- kernel/sched_fair.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index eed35eded602..a878b5332daa 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1240,6 +1240,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) * effect of the currently running task from the load * of the current CPU: */ + rcu_read_lock(); if (sync) { tg = task_group(current); weight = current->se.load.weight; @@ -1275,6 +1276,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) balanced = this_eff_load <= prev_eff_load; } else balanced = true; + rcu_read_unlock(); /* * If the currently running task will sleep within From 94bfa3b6692c7a3f6f119596724204ec975d3ef0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 7 Jun 2010 17:09:45 -0700 Subject: [PATCH 2/3] idr: fix RCU lockdep splat in idr_get_next() Convert to rcu_dereference_raw() given that many callers may have many different locking models. Located-by: Miles Lane Tested-by: Miles Lane Signed-off-by: Paul E. McKenney --- lib/idr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index c1a206901761..7f1a4f0acf50 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -602,7 +602,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) /* find first ent */ n = idp->layers * IDR_BITS; max = 1 << n; - p = rcu_dereference(idp->top); + p = rcu_dereference_raw(idp->top); if (!p) return NULL; @@ -610,7 +610,7 @@ void *idr_get_next(struct idr *idp, int *nextidp) while (n > 0 && p) { n -= IDR_BITS; *paa++ = p; - p = rcu_dereference(p->ary[(id >> n) & IDR_MASK]); + p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); } if (p) { From 8695159967957015f8dfb49315d6f88e111d90e0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Jun 2010 11:44:53 +0200 Subject: [PATCH 3/3] sched: silence PROVE_RCU in sched_fork() Because cgroup_fork() is ran before sched_fork() [ from copy_process() ] and the child's pid is not yet visible the child is pinned to its cgroup. Therefore we can silence this warning. A nicer solution would be moving cgroup_fork() to right after dup_task_struct() and exclude PF_STARTING from task_subsys_state(). Signed-off-by: Peter Zijlstra Reviewed-by: Li Zefan Signed-off-by: Paul E. McKenney --- kernel/sched.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/sched.c b/kernel/sched.c index f8b8996228dd..a2d215d132f6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2494,7 +2494,16 @@ void sched_fork(struct task_struct *p, int clone_flags) if (p->sched_class->task_fork) p->sched_class->task_fork(p); + /* + * The child is not yet in the pid-hash so no cgroup attach races, + * and the cgroup is pinned to this child due to cgroup_fork() + * is ran before sched_fork(). + * + * Silence PROVE_RCU. + */ + rcu_read_lock(); set_task_cpu(p, cpu); + rcu_read_unlock(); #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) if (likely(sched_info_on()))