sched: per-cpu mostly_idle threshold
sched_mostly_idle_load and sched_mostly_idle_nr_run knobs help pack tasks on cpus to some extent. In some cases, it may be desirable to have different packing limits for different cpus. For example, pack to a higher limit on high-performance cpus compared to power-efficient cpus. This patch removes the global mostly_idle tunables and makes them per-cpu, thus letting task packing behavior to be controlled in a fine-grained manner. Change-Id: Ifc254cda34b928eae9d6c342ce4c0f64e531e6c2 Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org>
This commit is contained in:
parent
f0e281597c
commit
ed7d7749e9
|
@ -564,23 +564,22 @@ both tasks and CPUs to aid in the placement of tasks.
|
|||
|
||||
The "mostly_idle" classification applies to CPUs. This
|
||||
classification attempts to answer the following question: if a task
|
||||
is put on this CPU, is it likely to be able to run soon? One
|
||||
possible way to answer this question would be to just check whether
|
||||
the CPU is idle or not. That may be too conservative however. The
|
||||
CPU may be currently executing a very small task and could become
|
||||
idle soon. Since the scheduler is tracking the demand of each task
|
||||
it can make an educated guess as to whether a CPU will become idle
|
||||
in the near future.
|
||||
is put on this CPU, is it likely to be able to run with low contention for
|
||||
bandwidth? One possible way to answer this question would be to just check
|
||||
whether the CPU is idle or not. That may be too conservative however. The CPU
|
||||
may be currently executing a very small task and could become idle soon. Since
|
||||
the scheduler is tracking the demand of each task it can make an educated
|
||||
guess as to whether a CPU will become idle in the near future.
|
||||
|
||||
There are two tunable parameters which are used to determine whether
|
||||
a CPU is mostly idle:
|
||||
|
||||
/proc/sys/kernel/sched_mostly_idle_nr_run
|
||||
/proc/sys/kernel/sched_mostly_idle_load
|
||||
/sys/devices/system/cpu/cpuX/sched_mostly_idle_nr_run
|
||||
/sys/devices/system/cpu/cpuX/sched_mostly_idle_load
|
||||
|
||||
If a CPU does not have more than sched_mostly_idle_nr_run runnable
|
||||
tasks and is not more than sched_mostly_idle_load percent busy, it
|
||||
is considered mostly idle.
|
||||
Note that these tunables are per-cpu. If a CPU does not have more than
|
||||
sched_mostly_idle_nr_run runnable tasks and is not more than
|
||||
sched_mostly_idle_load percent busy, it is considered mostly idle.
|
||||
|
||||
- spill threshold
|
||||
|
||||
|
@ -1042,9 +1041,9 @@ IRQ_UPDATE
|
|||
|
||||
*** 7.1 sched_mostly_idle_nr_run
|
||||
|
||||
Appears at: /proc/sys/kernel/sched_mostly_idle_nr_run
|
||||
Appears at: /sys/devices/system/cpu/cpuX/sched_mostly_idle_nr_run
|
||||
|
||||
Default value: 4
|
||||
Default value: 3
|
||||
|
||||
If a CPU has this many runnable tasks (or less), it is considered
|
||||
"mostly idle." A mostly idle CPU is a preferred destination for a
|
||||
|
@ -1054,7 +1053,7 @@ than sched_mostly_idle_load percent busy.
|
|||
|
||||
*** 7.2 sched_mostly_idle_load
|
||||
|
||||
Appears at: /proc/sys/kernel/sched_mostly_idle_load
|
||||
Appears at: /sys/devices/system/cpu/cpuX/sched_mostly_idle_load
|
||||
|
||||
Default value: 20
|
||||
|
||||
|
|
|
@ -168,6 +168,85 @@ static ssize_t show_crash_notes_size(struct device *dev,
|
|||
static DEVICE_ATTR(crash_notes_size, 0400, show_crash_notes_size, NULL);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
static ssize_t show_sched_mostly_idle_load(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct cpu *cpu = container_of(dev, struct cpu, dev);
|
||||
ssize_t rc;
|
||||
int cpunum;
|
||||
int mostly_idle_pct;
|
||||
|
||||
cpunum = cpu->dev.id;
|
||||
|
||||
mostly_idle_pct = sched_get_cpu_mostly_idle_load(cpunum);
|
||||
|
||||
rc = snprintf(buf, PAGE_SIZE-2, "%d\n", mostly_idle_pct);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static ssize_t __ref store_sched_mostly_idle_load(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct cpu *cpu = container_of(dev, struct cpu, dev);
|
||||
int cpuid = cpu->dev.id;
|
||||
int mostly_idle_load, err;
|
||||
|
||||
err = kstrtoint(strstrip((char *)buf), 0, &mostly_idle_load);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = sched_set_cpu_mostly_idle_load(cpuid, mostly_idle_load);
|
||||
if (err >= 0)
|
||||
err = count;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static ssize_t show_sched_mostly_idle_nr_run(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct cpu *cpu = container_of(dev, struct cpu, dev);
|
||||
ssize_t rc;
|
||||
int cpunum;
|
||||
int mostly_idle_nr_run;
|
||||
|
||||
cpunum = cpu->dev.id;
|
||||
|
||||
mostly_idle_nr_run = sched_get_cpu_mostly_idle_nr_run(cpunum);
|
||||
|
||||
rc = snprintf(buf, PAGE_SIZE-2, "%d\n", mostly_idle_nr_run);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static ssize_t __ref store_sched_mostly_idle_nr_run(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct cpu *cpu = container_of(dev, struct cpu, dev);
|
||||
int cpuid = cpu->dev.id;
|
||||
int mostly_idle_nr_run, err;
|
||||
|
||||
err = kstrtoint(strstrip((char *)buf), 0, &mostly_idle_nr_run);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = sched_set_cpu_mostly_idle_nr_run(cpuid, mostly_idle_nr_run);
|
||||
if (err >= 0)
|
||||
err = count;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(sched_mostly_idle_load, 0664, show_sched_mostly_idle_load,
|
||||
store_sched_mostly_idle_load);
|
||||
static DEVICE_ATTR(sched_mostly_idle_nr_run, 0664,
|
||||
show_sched_mostly_idle_nr_run, store_sched_mostly_idle_nr_run);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Print cpu online, possible, present, and system maps
|
||||
*/
|
||||
|
@ -337,6 +416,16 @@ int __cpuinit register_cpu(struct cpu *cpu, int num)
|
|||
error = device_create_file(&cpu->dev,
|
||||
&dev_attr_crash_notes_size);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
if (!error)
|
||||
error = device_create_file(&cpu->dev,
|
||||
&dev_attr_sched_mostly_idle_load);
|
||||
if (!error)
|
||||
error = device_create_file(&cpu->dev,
|
||||
&dev_attr_sched_mostly_idle_nr_run);
|
||||
#endif
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
|
|
@ -1913,9 +1913,15 @@ sched_set_cpu_cstate(int cpu, int cstate, int wakeup_energy, int wakeup_latency)
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
|
||||
extern int sched_set_boost(int enable);
|
||||
extern int sched_set_init_task_load(struct task_struct *p, int init_load_pct);
|
||||
extern u32 sched_get_init_task_load(struct task_struct *p);
|
||||
extern int sched_set_cpu_mostly_idle_load(int cpu, int mostly_idle_pct);
|
||||
extern int sched_get_cpu_mostly_idle_load(int cpu);
|
||||
extern int sched_set_cpu_mostly_idle_nr_run(int cpu, int nr_run);
|
||||
extern int sched_get_cpu_mostly_idle_nr_run(int cpu);
|
||||
|
||||
#else
|
||||
static inline int sched_set_boost(int enable)
|
||||
{
|
||||
|
|
|
@ -55,9 +55,7 @@ extern int sysctl_sched_freq_dec_notify;
|
|||
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
extern unsigned int sysctl_sched_spill_nr_run;
|
||||
extern unsigned int sysctl_sched_mostly_idle_nr_run;
|
||||
extern unsigned int sysctl_sched_spill_load_pct;
|
||||
extern unsigned int sysctl_sched_mostly_idle_load_pct;
|
||||
extern unsigned int sysctl_sched_small_task_pct;
|
||||
extern unsigned int sysctl_sched_upmigrate_pct;
|
||||
extern unsigned int sysctl_sched_downmigrate_pct;
|
||||
|
|
|
@ -2026,6 +2026,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
|
|||
u64 start_ts = sched_clock();
|
||||
int reason = WINDOW_CHANGE;
|
||||
unsigned int old = 0, new = 0;
|
||||
unsigned int old_window_size = sched_ravg_window;
|
||||
|
||||
disable_window_stats();
|
||||
|
||||
|
@ -2048,8 +2049,13 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
|
|||
for_each_possible_cpu(cpu) {
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
if (window_start)
|
||||
if (window_start) {
|
||||
u32 mostly_idle_load = rq->mostly_idle_load;
|
||||
|
||||
rq->window_start = window_start;
|
||||
rq->mostly_idle_load = div64_u64((u64)mostly_idle_load *
|
||||
(u64)sched_ravg_window, (u64)old_window_size);
|
||||
}
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
|
||||
#endif
|
||||
|
@ -8984,6 +8990,8 @@ void __init sched_init(void)
|
|||
rq->window_start = 0;
|
||||
rq->nr_small_tasks = rq->nr_big_tasks = 0;
|
||||
rq->hmp_flags = 0;
|
||||
rq->mostly_idle_load = pct_to_real(20);
|
||||
rq->mostly_idle_nr_run = 3;
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
rq->old_busy_time = 0;
|
||||
rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
|
||||
|
|
|
@ -306,6 +306,8 @@ do { \
|
|||
P(cpu_power);
|
||||
#endif
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
P(mostly_idle_load);
|
||||
P(mostly_idle_nr_run);
|
||||
P(load_scale_factor);
|
||||
P(capacity);
|
||||
P(max_possible_capacity);
|
||||
|
@ -398,7 +400,6 @@ static void sched_debug_header(struct seq_file *m)
|
|||
P(sysctl_sched_child_runs_first);
|
||||
P(sysctl_sched_features);
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
P(sched_mostly_idle_load);
|
||||
P(sched_small_task);
|
||||
P(sched_upmigrate);
|
||||
P(sched_downmigrate);
|
||||
|
|
|
@ -1255,14 +1255,6 @@ unsigned int __read_mostly sched_enable_hmp = 0;
|
|||
*/
|
||||
unsigned int __read_mostly sysctl_sched_spill_nr_run = 10;
|
||||
|
||||
/*
|
||||
* A cpu is considered practically idle, if:
|
||||
*
|
||||
* rq->nr_running <= sysctl_sched_mostly_idle_nr_run &&
|
||||
* rq->cumulative_runnable_avg <= sched_mostly_idle_load
|
||||
*/
|
||||
unsigned int __read_mostly sysctl_sched_mostly_idle_nr_run = 3;
|
||||
|
||||
/*
|
||||
* Control whether or not individual CPU power consumption is used to
|
||||
* guide task placement.
|
||||
|
@ -1276,16 +1268,6 @@ unsigned int __read_mostly sched_enable_power_aware = 0;
|
|||
*/
|
||||
unsigned int __read_mostly sysctl_sched_powerband_limit_pct = 20;
|
||||
|
||||
/*
|
||||
* Conversion of *_pct to absolute form is based on max_task_load().
|
||||
*
|
||||
* For example:
|
||||
* sched_mostly_idle_load =
|
||||
* (sysctl_sched_mostly_idle_load_pct * max_task_load()) / 100;
|
||||
*/
|
||||
unsigned int __read_mostly sched_mostly_idle_load;
|
||||
unsigned int __read_mostly sysctl_sched_mostly_idle_load_pct = 20;
|
||||
|
||||
/*
|
||||
* CPUs with load greater than the sched_spill_load_threshold are not
|
||||
* eligible for task placement. When all CPUs in a cluster achieve a
|
||||
|
@ -1352,17 +1334,11 @@ static inline int available_cpu_capacity(int cpu)
|
|||
return rq->capacity;
|
||||
}
|
||||
|
||||
#define pct_to_real(tunable) \
|
||||
(div64_u64((u64)tunable * (u64)max_task_load(), 100))
|
||||
|
||||
void set_hmp_defaults(void)
|
||||
{
|
||||
sched_spill_load =
|
||||
pct_to_real(sysctl_sched_spill_load_pct);
|
||||
|
||||
sched_mostly_idle_load =
|
||||
pct_to_real(sysctl_sched_mostly_idle_load_pct);
|
||||
|
||||
sched_small_task =
|
||||
pct_to_real(sysctl_sched_small_task_pct);
|
||||
|
||||
|
@ -1401,6 +1377,44 @@ int sched_set_init_task_load(struct task_struct *p, int init_load_pct)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int sched_set_cpu_mostly_idle_load(int cpu, int mostly_idle_pct)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
if (mostly_idle_pct < 0 || mostly_idle_pct > 100)
|
||||
return -EINVAL;
|
||||
|
||||
rq->mostly_idle_load = pct_to_real(mostly_idle_pct);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sched_get_cpu_mostly_idle_load(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
int mostly_idle_pct;
|
||||
|
||||
mostly_idle_pct = real_to_pct(rq->mostly_idle_load);
|
||||
|
||||
return mostly_idle_pct;
|
||||
}
|
||||
|
||||
int sched_set_cpu_mostly_idle_nr_run(int cpu, int nr_run)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
rq->mostly_idle_nr_run = nr_run;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sched_get_cpu_mostly_idle_nr_run(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
return rq->mostly_idle_nr_run;
|
||||
}
|
||||
|
||||
/*
|
||||
* 'load' is in reference to "best cpu" at its best frequency.
|
||||
* Scale that in reference to a given cpu, accounting for how bad it is
|
||||
|
@ -1462,9 +1476,12 @@ spill_threshold_crossed(struct task_struct *p, struct rq *rq, int cpu)
|
|||
int mostly_idle_cpu(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
int mostly_idle;
|
||||
|
||||
return (cpu_load(cpu) <= sched_mostly_idle_load
|
||||
&& rq->nr_running <= sysctl_sched_mostly_idle_nr_run);
|
||||
mostly_idle = (cpu_load(cpu) <= rq->mostly_idle_load
|
||||
&& rq->nr_running <= rq->mostly_idle_nr_run);
|
||||
|
||||
return mostly_idle;
|
||||
}
|
||||
|
||||
static int boost_refcount;
|
||||
|
@ -1988,10 +2005,9 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
|
|||
return ret;
|
||||
|
||||
if ((sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) ||
|
||||
(sysctl_sched_mostly_idle_load_pct >
|
||||
sysctl_sched_spill_load_pct) || *data > 100) {
|
||||
*data = old_val;
|
||||
return -EINVAL;
|
||||
*data > 100) {
|
||||
*data = old_val;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -7138,8 +7154,8 @@ static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
|
|||
int i;
|
||||
|
||||
if (rq->nr_running >= 2 && (rq->nr_running - rq->nr_small_tasks >= 2 ||
|
||||
rq->nr_running > sysctl_sched_mostly_idle_nr_run ||
|
||||
cpu_load(cpu) > sched_mostly_idle_load)) {
|
||||
rq->nr_running > rq->mostly_idle_nr_run ||
|
||||
cpu_load(cpu) > rq->mostly_idle_load)) {
|
||||
|
||||
if (rq->capacity == max_capacity)
|
||||
return 1;
|
||||
|
|
|
@ -497,6 +497,8 @@ struct rq {
|
|||
int capacity;
|
||||
int max_possible_capacity;
|
||||
u64 window_start;
|
||||
u32 mostly_idle_load;
|
||||
int mostly_idle_nr_run;
|
||||
|
||||
#ifdef CONFIG_SCHED_FREQ_INPUT
|
||||
unsigned int old_busy_time;
|
||||
|
@ -749,6 +751,12 @@ dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
|
|||
BUG_ON((s64)rq->cumulative_runnable_avg < 0);
|
||||
}
|
||||
|
||||
#define pct_to_real(tunable) \
|
||||
(div64_u64((u64)tunable * (u64)max_task_load(), 100))
|
||||
|
||||
#define real_to_pct(tunable) \
|
||||
(div64_u64((u64)tunable * (u64)100, (u64)max_task_load()))
|
||||
|
||||
#else /* CONFIG_SCHED_HMP */
|
||||
|
||||
static inline int pct_task_load(struct task_struct *p) { return 0; }
|
||||
|
|
|
@ -367,20 +367,6 @@ static struct ctl_table kern_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = sched_hmp_proc_update_handler,
|
||||
},
|
||||
{
|
||||
.procname = "sched_mostly_idle_load",
|
||||
.data = &sysctl_sched_mostly_idle_load_pct,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_hmp_proc_update_handler,
|
||||
},
|
||||
{
|
||||
.procname = "sched_mostly_idle_nr_run",
|
||||
.data = &sysctl_sched_mostly_idle_nr_run,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "sched_spill_load",
|
||||
.data = &sysctl_sched_spill_load_pct,
|
||||
|
|
Loading…
Reference in New Issue