sched: Per-cpu prefer_idle flag

Remove the global sysctl_sched_prefer_idle flag and replace it with a
per-cpu prefer_idle flag. The per-cpu flag is expected to same for all
cpus in a cluster. It thus provides convenient means to disable
packing in one cluster while allowing packing in another cluster.

Change-Id: Ie4cc73bb1a55b4eac5697be38e558546161faca1
Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org>
This commit is contained in:
Srivatsa Vaddagiri 2014-12-17 14:04:26 +05:30
parent 92ba1d55f3
commit 599bfc7503
8 changed files with 85 additions and 21 deletions

View File

@ -1247,7 +1247,7 @@ the CPU.
** 7.23 sched_prefer_idle
Appears at: /proc/sys/kernel/sched_prefer_idle
Appears at: /sys/devices/system/cpu/cpuX/sched_prefer_idle
Default value: 1

View File

@ -277,13 +277,52 @@ static ssize_t __ref store_sched_mostly_idle_nr_run(struct device *dev,
return err;
}
static ssize_t show_sched_prefer_idle(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cpu *cpu = container_of(dev, struct cpu, dev);
ssize_t rc;
int cpunum;
int prefer_idle;
cpunum = cpu->dev.id;
prefer_idle = sched_get_cpu_prefer_idle(cpunum);
rc = snprintf(buf, PAGE_SIZE-2, "%d\n", prefer_idle);
return rc;
}
static ssize_t __ref store_sched_prefer_idle(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct cpu *cpu = container_of(dev, struct cpu, dev);
int cpuid = cpu->dev.id;
int prefer_idle, err;
err = kstrtoint(strstrip((char *)buf), 0, &prefer_idle);
if (err)
return err;
err = sched_set_cpu_prefer_idle(cpuid, prefer_idle);
if (err >= 0)
err = count;
return err;
}
static DEVICE_ATTR(sched_mostly_idle_freq, 0664, show_sched_mostly_idle_freq,
store_sched_mostly_idle_freq);
static DEVICE_ATTR(sched_mostly_idle_load, 0664, show_sched_mostly_idle_load,
store_sched_mostly_idle_load);
static DEVICE_ATTR(sched_mostly_idle_nr_run, 0664,
show_sched_mostly_idle_nr_run, store_sched_mostly_idle_nr_run);
#endif
static DEVICE_ATTR(sched_prefer_idle, 0664,
show_sched_prefer_idle, store_sched_prefer_idle);
#endif /* CONFIG_SCHED_HMP */
/*
* Print cpu online, possible, present, and system maps
@ -465,6 +504,9 @@ int __cpuinit register_cpu(struct cpu *cpu, int num)
if (!error)
error = device_create_file(&cpu->dev,
&dev_attr_sched_mostly_idle_freq);
if (!error)
error = device_create_file(&cpu->dev,
&dev_attr_sched_prefer_idle);
#endif
return error;

View File

@ -1917,6 +1917,8 @@ sched_set_cpu_cstate(int cpu, int cstate, int wakeup_energy, int wakeup_latency)
extern int sched_set_boost(int enable);
extern int sched_set_init_task_load(struct task_struct *p, int init_load_pct);
extern u32 sched_get_init_task_load(struct task_struct *p);
extern int sched_set_cpu_prefer_idle(int cpu, int prefer_idle);
extern int sched_get_cpu_prefer_idle(int cpu);
extern int sched_set_cpu_mostly_idle_load(int cpu, int mostly_idle_pct);
extern int sched_get_cpu_mostly_idle_load(int cpu);
extern int sched_set_cpu_mostly_idle_nr_run(int cpu, int nr_run);

View File

@ -61,7 +61,6 @@ extern unsigned int sysctl_sched_small_task_pct;
extern unsigned int sysctl_sched_upmigrate_pct;
extern unsigned int sysctl_sched_downmigrate_pct;
extern int sysctl_sched_upmigrate_min_nice;
extern unsigned int sysctl_sched_prefer_idle;
extern unsigned int sysctl_sched_powerband_limit_pct;
extern unsigned int sysctl_sched_boost;

View File

@ -9021,6 +9021,7 @@ void __init sched_init(void)
rq->cur_irqload = 0;
rq->avg_irqload = 0;
rq->irqload_ts = 0;
rq->prefer_idle = 1;
#ifdef CONFIG_SCHED_FREQ_INPUT
rq->old_busy_time = 0;
rq->curr_runnable_sum = rq->prev_runnable_sum = 0;

View File

@ -1319,13 +1319,6 @@ unsigned int __read_mostly sysctl_sched_downmigrate_pct = 60;
*/
int __read_mostly sysctl_sched_upmigrate_min_nice = 15;
/*
* Tunable to govern scheduler wakeup placement CPU selection
* preference. If set, the scheduler chooses to wake up a task
* on an idle CPU.
*/
unsigned int __read_mostly sysctl_sched_prefer_idle = 1;
/*
* Scheduler boost is a mechanism to temporarily place tasks on CPUs
* with higher capacity than those where a task would have normally
@ -1384,6 +1377,22 @@ int sched_set_init_task_load(struct task_struct *p, int init_load_pct)
return 0;
}
int sched_set_cpu_prefer_idle(int cpu, int prefer_idle)
{
struct rq *rq = cpu_rq(cpu);
rq->prefer_idle = !!prefer_idle;
return 0;
}
int sched_get_cpu_prefer_idle(int cpu)
{
struct rq *rq = cpu_rq(cpu);
return rq->prefer_idle;
}
int sched_set_cpu_mostly_idle_load(int cpu, int mostly_idle_pct)
{
struct rq *rq = cpu_rq(cpu);
@ -1927,20 +1936,24 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
int small_task = is_small_task(p);
int boost = sched_boost();
int cstate, min_cstate = INT_MAX;
int prefer_idle = reason ? 1 : sysctl_sched_prefer_idle;
int prefer_idle = -1;
int curr_cpu = smp_processor_id();
int prefer_idle_override = 0;
if (reason) {
prefer_idle = 1;
prefer_idle_override = 1;
}
if (wake_to_idle(p)) {
prefer_idle = 1;
prefer_idle_override = 1;
small_task = 0;
}
trace_sched_task_load(p, small_task, boost, reason, sync, prefer_idle);
if (small_task && !boost) {
best_cpu = best_small_task_cpu(p, sync);
prefer_idle = 0; /* For sched_task_load tracepoint */
goto done;
}
@ -1970,6 +1983,10 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
continue;
}
/* Set prefer_idle based on the cpu where task will first fit */
if (prefer_idle == -1)
prefer_idle = cpu_rq(i)->prefer_idle;
if (!eligible_cpu(p, i, sync))
continue;
@ -1996,6 +2013,8 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
min_cstate = INT_MAX;
min_cstate_cpu = -1;
best_cpu = -1;
if (!prefer_idle_override)
prefer_idle = cpu_rq(i)->prefer_idle;
}
/*
@ -2051,7 +2070,7 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
}
}
if (min_cstate_cpu >= 0 && (prefer_idle ||
if (min_cstate_cpu >= 0 && (prefer_idle > 0 ||
!(best_cpu >= 0 && mostly_idle_cpu_sync(best_cpu, sync))))
best_cpu = min_cstate_cpu;
done:
@ -2070,6 +2089,13 @@ done:
if (cpu_rq(best_cpu)->mostly_idle_freq && !prefer_idle_override)
best_cpu = select_packing_target(p, best_cpu);
/*
* prefer_idle is initialized towards middle of function. Leave this
* tracepoint towards end to capture prefer_idle flag used for this
* instance of wakeup.
*/
trace_sched_task_load(p, small_task, boost, reason, sync, prefer_idle);
return best_cpu;
}

View File

@ -497,6 +497,7 @@ struct rq {
int capacity;
int max_possible_capacity;
u64 window_start;
int prefer_idle;
u32 mostly_idle_load;
int mostly_idle_nr_run;
int mostly_idle_freq;

View File

@ -409,13 +409,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_prefer_idle",
.data = &sysctl_sched_prefer_idle,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_init_task_load",
.data = &sysctl_sched_init_task_load_pct,