sched: Add cgroup-based criteria for upmigration

It may be desirable to discourage upmigration of tasks belonging to
some cgroups. Add a per-cgroup flag (upmigrate_discourage) that
discourages upmigration of tasks of a cgroup. Tasks of the cgroup are
allowed to upmigrate only under overcommitted scenario.

Change-Id: I1780e420af1b6865c5332fb55ee1ee408b74d8ce
Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org>
This commit is contained in:
Srivatsa Vaddagiri 2015-02-06 18:05:53 +05:30
parent 93a10a8b28
commit 995fad6d1a
4 changed files with 94 additions and 8 deletions

View File

@ -557,8 +557,13 @@ both tasks and CPUs to aid in the placement of tasks.
/proc/sys/kernel/sched_upmigrate
This value is a percentage. If a task consumes more than this much of
a particular CPU, that CPU will be considered too small for the task.
This value is a percentage. If a task consumes more than this much of a
particular CPU, that CPU will be considered too small for the task. The task
will thus be seen as a "big" task on the cpu and will reflect in nr_big_tasks
statistics maintained for that cpu. Note that certain tasks (whose nice
value exceeds sched_upmigrate_min_nice value or those that belong to a cgroup
whose upmigrate_discourage flag is set) will never be classified as big tasks
despite their high demand.
- mostly_idle
@ -1096,6 +1101,8 @@ A task whose nice value is greater than this tunable value will never
be considered as a "big" task (it will not be allowed to run on a
high-performance CPU).
See also notes on 'cpu.upmigrate_discourage' tunable.
*** 7.10 sched_enable_power_aware
Appears at: /proc/sys/kernel/sched_enable_power_aware
@ -1284,6 +1291,22 @@ account of energy awareness reasons.
The same logic also applies to the load balancer path to avoid frequent
migrations due to energy awareness.
** 7.25 cpu.upmigrate_discourage
Default value : 0
This is a cgroup attribute supported by the cpu resource controller. It normally
appears at [root_cpu]/[name1]/../[name2]/cpu.upmigrate_discourage. Here
"root_cpu" is the mount point for cgroup (cpu resource control) filesystem
and name1, name2 etc are names of cgroups that form a hierarchy.
Setting this flag to 1 discourages upmigration for all tasks of a cgroup. High
demand tasks of such a cgroup will never be classified as big tasks and hence
not upmigrated. Any task of the cgroup is allowed to upmigrate only under
overcommitted scenario. See notes on sched_spill_nr_run and sched_spill_load for
how overcommitment threshold is defined and also notes on
'sched_upmigrate_min_nice' tunable.
=========================
8. HMP SCHEDULER TRACE POINTS
=========================

View File

@ -9808,6 +9808,45 @@ static int cpu_notify_on_migrate_write_u64(struct cgroup *cgrp,
return 0;
}
#ifdef CONFIG_SCHED_HMP
static u64 cpu_upmigrate_discourage_read_u64(struct cgroup *cgrp,
struct cftype *cft)
{
struct task_group *tg = cgroup_tg(cgrp);
return tg->upmigrate_discouraged;
}
static int cpu_upmigrate_discourage_write_u64(struct cgroup *cgrp,
struct cftype *cft, u64 upmigrate_discourage)
{
struct task_group *tg = cgroup_tg(cgrp);
int discourage = upmigrate_discourage > 0;
if (tg->upmigrate_discouraged == discourage)
return 0;
/*
* Revisit big-task classification for tasks of this cgroup. It would
* have been efficient to walk tasks of just this cgroup in running
* state, but we don't have easy means to do that. Walk all tasks in
* running state on all cpus instead and re-visit their big task
* classification.
*/
get_online_cpus();
pre_big_small_task_count_change(cpu_online_mask);
tg->upmigrate_discouraged = discourage;
post_big_small_task_count_change(cpu_online_mask);
put_online_cpus();
return 0;
}
#endif /* CONFIG_SCHED_HMP */
#ifdef CONFIG_FAIR_GROUP_SCHED
static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
u64 shareval)
@ -10091,6 +10130,13 @@ static struct cftype cpu_files[] = {
.read_u64 = cpu_notify_on_migrate_read_u64,
.write_u64 = cpu_notify_on_migrate_write_u64,
},
#ifdef CONFIG_SCHED_HMP
{
.name = "upmigrate_discourage",
.read_u64 = cpu_upmigrate_discourage_read_u64,
.write_u64 = cpu_upmigrate_discourage_write_u64,
},
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
{
.name = "shares",

View File

@ -1499,14 +1499,29 @@ u64 scale_load_to_cpu(u64 task_load, int cpu)
return task_load;
}
#ifdef CONFIG_CGROUP_SCHED
static inline int upmigrate_discouraged(struct task_struct *p)
{
return task_group(p)->upmigrate_discouraged;
}
#else
static inline int upmigrate_discouraged(struct task_struct *p)
{
return 0;
}
#endif
/* Is a task "big" on its current cpu */
static inline int is_big_task(struct task_struct *p)
{
u64 load = task_load(p);
int nice = TASK_NICE(p);
/* Todo: Provide cgroup-based control as well? */
if (nice > sched_upmigrate_min_nice)
if (nice > sched_upmigrate_min_nice || upmigrate_discouraged(p))
return 0;
load = scale_load_to_cpu(load, task_cpu(p));
@ -1693,8 +1708,7 @@ static int task_will_fit(struct task_struct *p, int cpu)
if (rq->capacity > prev_rq->capacity)
return 1;
} else {
/* Todo: Provide cgroup-based control as well? */
if (nice > sched_upmigrate_min_nice)
if (nice > sched_upmigrate_min_nice || upmigrate_discouraged(p))
return 1;
load = scale_load_to_cpu(task_load(p), cpu);
@ -2642,8 +2656,8 @@ static inline int migration_needed(struct rq *rq, struct task_struct *p)
if (is_small_task(p))
return 0;
/* Todo: cgroup-based control? */
if (nice > sched_upmigrate_min_nice && rq->capacity > min_capacity)
if ((nice > sched_upmigrate_min_nice || upmigrate_discouraged(p)) &&
rq->capacity > min_capacity)
return MOVE_TO_LITTLE_CPU;
if (!task_will_fit(p, cpu_of(rq)))

View File

@ -137,6 +137,9 @@ struct task_group {
struct cgroup_subsys_state css;
bool notify_on_migrate;
#ifdef CONFIG_SCHED_HMP
bool upmigrate_discouraged;
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each cpu */