sched: Avoid frequent migration of running task

Power values for cpus can drop quite considerably when it goes idle.
As a result, the best choice for running a single task in a cluster
can vary quite rapidly. As the task keeps hopping cpus, other cpus go
idle and start being seen as more favorable target for running a task,
leading to task migrating almost every scheduler tick!

Prevent this by keeping track of when a task started running on a cpu
and allowing task migration in tick path (migration_needed()) on
account of energy efficiency reasons only if the task has run
sufficiently long (as determined by sysctl_sched_min_runtime
variable).

Note that currently sysctl_sched_min_runtime setting is considered
only in scheduler_tick()->migration_needed() path and not in
idle_balance() path. In other words, a task could be migrated to
another cpu which did a idle_balance(). This limitation should not
affect high-frequency migrations seen typically (when a single
high-demand task runs on high-performance cpu).

CRs-Fixed: 756570
Change-Id: I96413b7a81b623193c3bbcec6f3fa9dfec367d99
Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org>
This commit is contained in:
Srivatsa Vaddagiri 2014-11-18 13:19:39 +05:30 committed by Olav Haugan
parent 3986d38e2c
commit 32c6ac7c62
6 changed files with 49 additions and 0 deletions

View File

@ -1250,6 +1250,24 @@ Non-small tasks will prefer to wake up on idle CPUs if this tunable is set to 1.
If the tunable is set to 0, non-small tasks will prefer to wake up on mostly
idle CPUs which are not completely idle, increasing task packing behavior.
** 7.24 sched_min_runtime
Appears at: /proc/sys/kernel/sched_min_runtime
Default value: 200000000 (200ms)
This tunable helps avouid frequent migration of task on account of
energy-awareness. During scheduler tick, a check is made (in migration_needed())
whether the running task needs to be migrated to a "better" cpu, which could
either offer better performance or power. When deciding to migrate task on
account of power, we want to avoid "frequent" migration of task (say every
tick), which could be add more overhead for comparatively little gains. A task's
'run_start' attribute is set when it starts running on a cpu. This information
is used in migration_needed() to avoid "frequent" migrations. Once a task has
been associated with a cpu (in either running or runnable state) for more than
'sched_min_vruntime' ns, it is considered eligible for migration in tick path on
account of energy awareness reasons.
=========================
8. HMP SCHEDULER TRACE POINTS
=========================

View File

@ -1168,6 +1168,7 @@ struct task_struct {
* of this task
*/
u32 init_load_pct;
u64 run_start;
#endif
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;

View File

@ -44,6 +44,7 @@ extern unsigned int sysctl_sched_cpu_high_irqload;
extern unsigned int sysctl_sched_freq_account_wait_time;
extern unsigned int sysctl_sched_migration_fixup;
extern unsigned int sysctl_sched_heavy_task_pct;
extern unsigned int sysctl_sched_min_runtime;
#if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP)
extern unsigned int sysctl_sched_init_task_load_pct;

View File

@ -2519,6 +2519,16 @@ static void restore_orig_mark_start(struct task_struct *p, u64 mark_start)
p->ravg.mark_start = mark_start;
}
/*
* Note down when task started running on a cpu. This information will be handy
* to avoid "too" frequent task migrations for a running task on account of
* power.
*/
static inline void note_run_start(struct task_struct *p, u64 wallclock)
{
p->run_start = wallclock;
}
#else /* CONFIG_SCHED_HMP */
static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { }
@ -2550,6 +2560,8 @@ restore_orig_mark_start(struct task_struct *p, u64 mark_start)
{
}
static inline void note_run_start(struct task_struct *p, u64 wallclock) { }
#endif /* CONFIG_SCHED_HMP */
#ifdef CONFIG_SMP
@ -2581,6 +2593,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
trace_sched_migrate_task(p, new_cpu, pct_task_load(p));
note_run_start(p, -1);
if (task_cpu(p) != new_cpu) {
struct task_migration_notifier tmn;
@ -4664,6 +4678,7 @@ need_resched:
prev->state = TASK_RUNNING;
} else {
deactivate_task(rq, prev, DEQUEUE_SLEEP);
note_run_start(prev, -1);
prev->on_rq = 0;
/*
@ -4694,6 +4709,7 @@ need_resched:
update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
clear_tsk_need_resched(prev);
rq->skip_clock_update = 0;
note_run_start(next, wallclock);
BUG_ON(task_cpu(next) != cpu_of(rq));

View File

@ -1229,6 +1229,8 @@ unsigned int __read_mostly sched_init_task_load_pelt;
unsigned int __read_mostly sched_init_task_load_windows;
unsigned int __read_mostly sysctl_sched_init_task_load_pct = 15;
unsigned int __read_mostly sysctl_sched_min_runtime = 200000000; /* 200 ms */
static inline unsigned int task_load(struct task_struct *p)
{
if (sched_use_pelt)
@ -2287,6 +2289,10 @@ static int lower_power_cpu_available(struct task_struct *p, int cpu)
int i;
int lowest_power_cpu = task_cpu(p);
int lowest_power = power_cost(p, task_cpu(p));
u64 delta = sched_clock() - p->run_start;
if (delta < sysctl_sched_min_runtime)
return 0;
/* Is a lower-powered idle CPU available which will fit this task? */
for_each_cpu_and(i, tsk_cpus_allowed(p), cpu_online_mask) {

View File

@ -374,6 +374,13 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = sched_hmp_proc_update_handler,
},
{
.procname = "sched_min_runtime",
.data = &sysctl_sched_min_runtime,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_spill_load",
.data = &sysctl_sched_spill_load_pct,