From 599bfc750304243ce5678cb153c652cb9c1e4748 Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Wed, 17 Dec 2014 14:04:26 +0530 Subject: [PATCH] sched: Per-cpu prefer_idle flag Remove the global sysctl_sched_prefer_idle flag and replace it with a per-cpu prefer_idle flag. The per-cpu flag is expected to same for all cpus in a cluster. It thus provides convenient means to disable packing in one cluster while allowing packing in another cluster. Change-Id: Ie4cc73bb1a55b4eac5697be38e558546161faca1 Signed-off-by: Srivatsa Vaddagiri --- Documentation/scheduler/sched-hmp.txt | 2 +- drivers/base/cpu.c | 44 +++++++++++++++++++++++- include/linux/sched.h | 2 ++ include/linux/sched/sysctl.h | 1 - kernel/sched/core.c | 1 + kernel/sched/fair.c | 48 +++++++++++++++++++++------ kernel/sched/sched.h | 1 + kernel/sysctl.c | 7 ---- 8 files changed, 85 insertions(+), 21 deletions(-) diff --git a/Documentation/scheduler/sched-hmp.txt b/Documentation/scheduler/sched-hmp.txt index 7851cec119d6..cf85ffd7d58a 100644 --- a/Documentation/scheduler/sched-hmp.txt +++ b/Documentation/scheduler/sched-hmp.txt @@ -1247,7 +1247,7 @@ the CPU. ** 7.23 sched_prefer_idle -Appears at: /proc/sys/kernel/sched_prefer_idle +Appears at: /sys/devices/system/cpu/cpuX/sched_prefer_idle Default value: 1 diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index eb0b4e84c62c..d5963ed5e5b1 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -277,13 +277,52 @@ static ssize_t __ref store_sched_mostly_idle_nr_run(struct device *dev, return err; } +static ssize_t show_sched_prefer_idle(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + ssize_t rc; + int cpunum; + int prefer_idle; + + cpunum = cpu->dev.id; + + prefer_idle = sched_get_cpu_prefer_idle(cpunum); + + rc = snprintf(buf, PAGE_SIZE-2, "%d\n", prefer_idle); + + return rc; +} + +static ssize_t __ref store_sched_prefer_idle(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cpu *cpu = container_of(dev, struct cpu, dev); + int cpuid = cpu->dev.id; + int prefer_idle, err; + + err = kstrtoint(strstrip((char *)buf), 0, &prefer_idle); + if (err) + return err; + + err = sched_set_cpu_prefer_idle(cpuid, prefer_idle); + if (err >= 0) + err = count; + + return err; +} + static DEVICE_ATTR(sched_mostly_idle_freq, 0664, show_sched_mostly_idle_freq, store_sched_mostly_idle_freq); static DEVICE_ATTR(sched_mostly_idle_load, 0664, show_sched_mostly_idle_load, store_sched_mostly_idle_load); static DEVICE_ATTR(sched_mostly_idle_nr_run, 0664, show_sched_mostly_idle_nr_run, store_sched_mostly_idle_nr_run); -#endif +static DEVICE_ATTR(sched_prefer_idle, 0664, + show_sched_prefer_idle, store_sched_prefer_idle); + +#endif /* CONFIG_SCHED_HMP */ /* * Print cpu online, possible, present, and system maps @@ -465,6 +504,9 @@ int __cpuinit register_cpu(struct cpu *cpu, int num) if (!error) error = device_create_file(&cpu->dev, &dev_attr_sched_mostly_idle_freq); + if (!error) + error = device_create_file(&cpu->dev, + &dev_attr_sched_prefer_idle); #endif return error; diff --git a/include/linux/sched.h b/include/linux/sched.h index 3e881a85fa2b..8814db1eb853 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1917,6 +1917,8 @@ sched_set_cpu_cstate(int cpu, int cstate, int wakeup_energy, int wakeup_latency) extern int sched_set_boost(int enable); extern int sched_set_init_task_load(struct task_struct *p, int init_load_pct); extern u32 sched_get_init_task_load(struct task_struct *p); +extern int sched_set_cpu_prefer_idle(int cpu, int prefer_idle); +extern int sched_get_cpu_prefer_idle(int cpu); extern int sched_set_cpu_mostly_idle_load(int cpu, int mostly_idle_pct); extern int sched_get_cpu_mostly_idle_load(int cpu); extern int sched_set_cpu_mostly_idle_nr_run(int cpu, int nr_run); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 431569461197..0905e3026733 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -61,7 +61,6 @@ extern unsigned int sysctl_sched_small_task_pct; extern unsigned int sysctl_sched_upmigrate_pct; extern unsigned int sysctl_sched_downmigrate_pct; extern int sysctl_sched_upmigrate_min_nice; -extern unsigned int sysctl_sched_prefer_idle; extern unsigned int sysctl_sched_powerband_limit_pct; extern unsigned int sysctl_sched_boost; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1f50fad54d57..c2212b7453da 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9021,6 +9021,7 @@ void __init sched_init(void) rq->cur_irqload = 0; rq->avg_irqload = 0; rq->irqload_ts = 0; + rq->prefer_idle = 1; #ifdef CONFIG_SCHED_FREQ_INPUT rq->old_busy_time = 0; rq->curr_runnable_sum = rq->prev_runnable_sum = 0; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bb93ec544e91..1dadcaba103f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1319,13 +1319,6 @@ unsigned int __read_mostly sysctl_sched_downmigrate_pct = 60; */ int __read_mostly sysctl_sched_upmigrate_min_nice = 15; -/* - * Tunable to govern scheduler wakeup placement CPU selection - * preference. If set, the scheduler chooses to wake up a task - * on an idle CPU. - */ -unsigned int __read_mostly sysctl_sched_prefer_idle = 1; - /* * Scheduler boost is a mechanism to temporarily place tasks on CPUs * with higher capacity than those where a task would have normally @@ -1384,6 +1377,22 @@ int sched_set_init_task_load(struct task_struct *p, int init_load_pct) return 0; } +int sched_set_cpu_prefer_idle(int cpu, int prefer_idle) +{ + struct rq *rq = cpu_rq(cpu); + + rq->prefer_idle = !!prefer_idle; + + return 0; +} + +int sched_get_cpu_prefer_idle(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + + return rq->prefer_idle; +} + int sched_set_cpu_mostly_idle_load(int cpu, int mostly_idle_pct) { struct rq *rq = cpu_rq(cpu); @@ -1927,20 +1936,24 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, int small_task = is_small_task(p); int boost = sched_boost(); int cstate, min_cstate = INT_MAX; - int prefer_idle = reason ? 1 : sysctl_sched_prefer_idle; + int prefer_idle = -1; int curr_cpu = smp_processor_id(); int prefer_idle_override = 0; + if (reason) { + prefer_idle = 1; + prefer_idle_override = 1; + } + if (wake_to_idle(p)) { prefer_idle = 1; prefer_idle_override = 1; small_task = 0; } - trace_sched_task_load(p, small_task, boost, reason, sync, prefer_idle); - if (small_task && !boost) { best_cpu = best_small_task_cpu(p, sync); + prefer_idle = 0; /* For sched_task_load tracepoint */ goto done; } @@ -1970,6 +1983,10 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, continue; } + /* Set prefer_idle based on the cpu where task will first fit */ + if (prefer_idle == -1) + prefer_idle = cpu_rq(i)->prefer_idle; + if (!eligible_cpu(p, i, sync)) continue; @@ -1996,6 +2013,8 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, min_cstate = INT_MAX; min_cstate_cpu = -1; best_cpu = -1; + if (!prefer_idle_override) + prefer_idle = cpu_rq(i)->prefer_idle; } /* @@ -2051,7 +2070,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason, } } - if (min_cstate_cpu >= 0 && (prefer_idle || + if (min_cstate_cpu >= 0 && (prefer_idle > 0 || !(best_cpu >= 0 && mostly_idle_cpu_sync(best_cpu, sync)))) best_cpu = min_cstate_cpu; done: @@ -2070,6 +2089,13 @@ done: if (cpu_rq(best_cpu)->mostly_idle_freq && !prefer_idle_override) best_cpu = select_packing_target(p, best_cpu); + /* + * prefer_idle is initialized towards middle of function. Leave this + * tracepoint towards end to capture prefer_idle flag used for this + * instance of wakeup. + */ + trace_sched_task_load(p, small_task, boost, reason, sync, prefer_idle); + return best_cpu; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9dd6aaf12672..cbbf22b943d1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -497,6 +497,7 @@ struct rq { int capacity; int max_possible_capacity; u64 window_start; + int prefer_idle; u32 mostly_idle_load; int mostly_idle_nr_run; int mostly_idle_freq; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1acf2e575519..9270bac790c7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -409,13 +409,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "sched_prefer_idle", - .data = &sysctl_sched_prefer_idle, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "sched_init_task_load", .data = &sysctl_sched_init_task_load_pct,