sched: Use only partial wait time as task demand

The scheduler currently either considers a tasks entire wait time as
task demand or completely ignores wait time based on the tunable
sched_account_wait_time. Both approaches have their limitations,
however. The former artificially boosts tasks demand when it may not
actually be justified. With the latter, the scheduler runs the risk
of never being able to recognize true load (consider two CPU hogs on
a single little CPU). To achieve a compromise between these two
extremes, change the load tracking algorithm to only consider part of
a tasks wait time as its demand. The portion of wait time accounted
as demand is determined by each tasks percent load, i.e. a task that
waits for 10ms and has 60 % task load, only 6 ms of the wait will
contribute to task demand. This approach is more fair as the scheduler
now tries to determine how much of its wait time would a task actually
have been using the CPU if it had been executing. It ensures that tasks
with high demand continue to see most of the benefits of accounting
wait time as busy time, however, lower demand tasks don't experience a
disproportionately high boost to demand triggering unjustified big CPU
usage. Note that this new approach is only applicable to wait time
being considered as task demand and not wait time considered as CPU
busy time.

To achieve the above effect, ensure that anytime a task is waiting, its
runtime in every relevant window segment is appropriately adjusted using
its pct load.

Change-Id: I6a698d6cb1adeca49113c3499029b422daf7871f
Signed-off-by: Syed Rameez Mustafa <rameezmustafa@codeaurora.org>
This commit is contained in:
Syed Rameez Mustafa 2015-02-27 16:12:01 -08:00
parent a81bfe20e4
commit 14fd2e5918
4 changed files with 38 additions and 12 deletions

View File

@ -1332,8 +1332,17 @@ Appears at: /proc/sys/kernel/sched_account_wait_time
Default value: 1
This controls whether a task's wait time is accounted as its demand for cpu
This controls whether a tasks wait time is accounted as its demand for cpu
and thus the values found in its sum, sum_history[] and demand attributes.
The load tracking algorithm only considers part of a tasks wait time as its
demand. The portion of wait time accounted as demand is determined by each
tasks percent load, i.e. a task that waits for 10ms and has 60 % task load,
only 6 ms of the wait will contribute to task demand. This approach is fair
as the scheduler tries to determine how much of its wait time would a task
actually have been using the CPU if it had been executing. It ensures that
tasks with high demand continue to see most of the benefits of accounting
wait time as busy time, however, lower demand tasks don't experience a
disproportionately high boost to demand.
*** 7.16 sched_freq_account_wait_time

View File

@ -1615,19 +1615,23 @@ static inline void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
static int account_busy_for_task_demand(struct task_struct *p, int event)
{
/* No need to bother updating task demand for exiting tasks
* or the idle task. */
if (exiting_task(p) || is_idle_task(p))
return 0;
/* When a task is waking up it is completing a segment of non-busy
/*
* When a task is waking up it is completing a segment of non-busy
* time. Likewise, if wait time is not treated as busy time, then
* when a task begins to run or is migrated, it is not running and
* is completing a segment of non-busy time. */
* is completing a segment of non-busy time.
*/
if (event == TASK_WAKE || (!sched_account_wait_time &&
(event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
(event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
return 0;
/*
* We are left with TASK_UPDATE, IRQ_UPDATE, PUT_PREV_TASK and
* wait time being accounted as busy time.
*/
return 1;
}
@ -1699,6 +1703,15 @@ static void add_to_task_demand(struct rq *rq, struct task_struct *p,
p->ravg.sum = sched_ravg_window;
}
static u64 wait_adjust(struct task_struct *p, u64 delta, int event)
{
/* We already know that wait time counts as busy time. */
if (event == PICK_NEXT_TASK || event == TASK_MIGRATE)
return div64_u64(delta * task_load(p), max_task_load());
return delta;
}
/*
* Account cpu demand of task and/or update task's cpu demand history
*
@ -1773,7 +1786,8 @@ static void update_task_demand(struct task_struct *p, struct rq *rq,
if (!new_window) {
/* The simple case - busy time contained within the existing
* window. */
add_to_task_demand(rq, p, wallclock - mark_start);
add_to_task_demand(rq, p, wait_adjust(p,
wallclock - mark_start, event));
return;
}
@ -1784,13 +1798,14 @@ static void update_task_demand(struct task_struct *p, struct rq *rq,
window_start -= (u64)nr_full_windows * (u64)window_size;
/* Process (window_start - mark_start) first */
add_to_task_demand(rq, p, window_start - mark_start);
add_to_task_demand(rq, p,
wait_adjust(p, window_start - mark_start, event));
/* Push new sample(s) into task's demand history */
update_history(rq, p, p->ravg.sum, 1, event);
if (nr_full_windows)
update_history(rq, p, scale_exec_time(window_size, rq),
nr_full_windows, event);
update_history(rq, p, scale_exec_time(wait_adjust(p,
window_size, event), rq), nr_full_windows, event);
/* Roll window_start back to current to process any remainder
* in current window. */
@ -1798,7 +1813,8 @@ static void update_task_demand(struct task_struct *p, struct rq *rq,
/* Process (wallclock - window_start) next */
mark_start = window_start;
add_to_task_demand(rq, p, wallclock - mark_start);
add_to_task_demand(rq, p,
wait_adjust(p, wallclock - mark_start, event));
}
/* Reflect task activity on its demand and cpu's busy time statistics */

View File

@ -1373,7 +1373,7 @@ unsigned int __read_mostly sysctl_sched_init_task_load_pct = 15;
unsigned int __read_mostly sysctl_sched_min_runtime = 0; /* 0 ms */
u64 __read_mostly sched_min_runtime = 0; /* 0 ms */
static inline unsigned int task_load(struct task_struct *p)
unsigned int task_load(struct task_struct *p)
{
if (sched_use_pelt)
return p->se.avg.runnable_avg_sum_scaled;

View File

@ -748,6 +748,7 @@ extern unsigned int sched_heavy_task;
extern void reset_cpu_hmp_stats(int cpu, int reset_cra);
extern void fixup_nr_big_small_task(int cpu, int reset_stats);
u64 scale_load_to_cpu(u64 load, int cpu);
unsigned int task_load(struct task_struct *p);
unsigned int max_task_load(void);
extern void sched_account_irqtime(int cpu, struct task_struct *curr,
u64 delta, u64 wallclock);