diff --git a/include/linux/sched.h b/include/linux/sched.h index 6fbbf38555ac..2e69f19369e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -849,6 +849,9 @@ struct sched_class { void (*task_tick) (struct rq *rq, struct task_struct *p); void (*task_new) (struct rq *rq, struct task_struct *p); void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask); + + void (*join_domain)(struct rq *rq); + void (*leave_domain)(struct rq *rq); }; struct load_weight { diff --git a/kernel/sched.c b/kernel/sched.c index 36bd8ff2a669..34b7d721d735 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -349,6 +349,28 @@ struct rt_rq { int overloaded; }; +#ifdef CONFIG_SMP + +/* + * We add the notion of a root-domain which will be used to define per-domain + * variables. Each exclusive cpuset essentially defines an island domain by + * fully partitioning the member cpus from any other cpuset. Whenever a new + * exclusive cpuset is created, we also create and attach a new root-domain + * object. + * + * By default the system creates a single root-domain with all cpus as + * members (mimicking the global state we have today). + */ +struct root_domain { + atomic_t refcount; + cpumask_t span; + cpumask_t online; +}; + +static struct root_domain def_root_domain; + +#endif + /* * This is the main, per-CPU runqueue data structure. * @@ -406,6 +428,7 @@ struct rq { atomic_t nr_iowait; #ifdef CONFIG_SMP + struct root_domain *rd; struct sched_domain *sd; /* For active balancing */ @@ -5550,6 +5573,15 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_ONLINE_FROZEN: /* Strictly unnecessary, as first user will wake it. */ wake_up_process(cpu_rq(cpu)->migration_thread); + + /* Update our root-domain */ + rq = cpu_rq(cpu); + spin_lock_irqsave(&rq->lock, flags); + if (rq->rd) { + BUG_ON(!cpu_isset(cpu, rq->rd->span)); + cpu_set(cpu, rq->rd->online); + } + spin_unlock_irqrestore(&rq->lock, flags); break; #ifdef CONFIG_HOTPLUG_CPU @@ -5600,6 +5632,17 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) } spin_unlock_irq(&rq->lock); break; + + case CPU_DOWN_PREPARE: + /* Update our root-domain */ + rq = cpu_rq(cpu); + spin_lock_irqsave(&rq->lock, flags); + if (rq->rd) { + BUG_ON(!cpu_isset(cpu, rq->rd->span)); + cpu_clear(cpu, rq->rd->online); + } + spin_unlock_irqrestore(&rq->lock, flags); + break; #endif } return NOTIFY_OK; @@ -5788,11 +5831,69 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) return 1; } +static void rq_attach_root(struct rq *rq, struct root_domain *rd) +{ + unsigned long flags; + const struct sched_class *class; + + spin_lock_irqsave(&rq->lock, flags); + + if (rq->rd) { + struct root_domain *old_rd = rq->rd; + + for (class = sched_class_highest; class; class = class->next) + if (class->leave_domain) + class->leave_domain(rq); + + if (atomic_dec_and_test(&old_rd->refcount)) + kfree(old_rd); + } + + atomic_inc(&rd->refcount); + rq->rd = rd; + + for (class = sched_class_highest; class; class = class->next) + if (class->join_domain) + class->join_domain(rq); + + spin_unlock_irqrestore(&rq->lock, flags); +} + +static void init_rootdomain(struct root_domain *rd, const cpumask_t *map) +{ + memset(rd, 0, sizeof(*rd)); + + rd->span = *map; + cpus_and(rd->online, rd->span, cpu_online_map); +} + +static void init_defrootdomain(void) +{ + cpumask_t cpus = CPU_MASK_ALL; + + init_rootdomain(&def_root_domain, &cpus); + atomic_set(&def_root_domain.refcount, 1); +} + +static struct root_domain *alloc_rootdomain(const cpumask_t *map) +{ + struct root_domain *rd; + + rd = kmalloc(sizeof(*rd), GFP_KERNEL); + if (!rd) + return NULL; + + init_rootdomain(rd, map); + + return rd; +} + /* * Attach the domain 'sd' to 'cpu' as its base domain. Callers must * hold the hotplug lock. */ -static void cpu_attach_domain(struct sched_domain *sd, int cpu) +static void cpu_attach_domain(struct sched_domain *sd, + struct root_domain *rd, int cpu) { struct rq *rq = cpu_rq(cpu); struct sched_domain *tmp; @@ -5817,6 +5918,7 @@ static void cpu_attach_domain(struct sched_domain *sd, int cpu) sched_domain_debug(sd, cpu); + rq_attach_root(rq, rd); rcu_assign_pointer(rq->sd, sd); } @@ -6185,6 +6287,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) static int build_sched_domains(const cpumask_t *cpu_map) { int i; + struct root_domain *rd; #ifdef CONFIG_NUMA struct sched_group **sched_group_nodes = NULL; int sd_allnodes = 0; @@ -6201,6 +6304,12 @@ static int build_sched_domains(const cpumask_t *cpu_map) sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; #endif + rd = alloc_rootdomain(cpu_map); + if (!rd) { + printk(KERN_WARNING "Cannot alloc root domain\n"); + return -ENOMEM; + } + /* * Set up domains for cpus specified by the cpu_map. */ @@ -6417,7 +6526,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) #else sd = &per_cpu(phys_domains, i); #endif - cpu_attach_domain(sd, i); + cpu_attach_domain(sd, rd, i); } return 0; @@ -6475,7 +6584,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) unregister_sched_domain_sysctl(); for_each_cpu_mask(i, *cpu_map) - cpu_attach_domain(NULL, i); + cpu_attach_domain(NULL, &def_root_domain, i); synchronize_sched(); arch_destroy_sched_domains(cpu_map); } @@ -6727,6 +6836,10 @@ void __init sched_init(void) int highest_cpu = 0; int i, j; +#ifdef CONFIG_SMP + init_defrootdomain(); +#endif + for_each_possible_cpu(i) { struct rt_prio_array *array; struct rq *rq; @@ -6765,6 +6878,8 @@ void __init sched_init(void) rq->cpu_load[j] = 0; #ifdef CONFIG_SMP rq->sd = NULL; + rq->rd = NULL; + rq_attach_root(rq, &def_root_domain); rq->active_balance = 0; rq->next_balance = jiffies; rq->push_cpu = 0;