sched: look for least busy and fallback CPU only when it's needed

Function best_small_task_cpu() has bias on mostly idle CPUs and shallow
cstate CPUs.  Thus chance of needing to find the least busy or the least
power cost fallback CPU is quite rare typically.  At present, however,
the function finds those two CPUs always unnecessarily for most of time.

Optimize the function by amending it to look for the least busy CPU and
the least power cost fallback CPU only when those are in need.  This change
is solely for optimization and doesn't make functional changes.

CRs-fixed: 849655
Change-Id: I5eca11436e85b448142a7a7644f422c71eb25e8e
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
This commit is contained in:
Joonwoo Park 2015-06-01 21:07:33 -07:00
parent 59e32e9c46
commit a7f3ff4330
1 changed files with 39 additions and 22 deletions

View File

@ -1964,11 +1964,12 @@ static int best_small_task_cpu(struct task_struct *p, int sync)
int min_cstate_cpu = -1;
int min_cstate = INT_MAX;
int cpu_cost, min_cost = INT_MAX;
int i = task_cpu(p), cstate, prev_cpu;
int i = task_cpu(p), prev_cpu;
int hmp_capable;
u64 tload, cpu_load, min_load = ULLONG_MAX;
cpumask_t temp;
cpumask_t search_cpu;
cpumask_t fb_search_cpu = CPU_MASK_NONE;
struct rq *rq;
cpumask_and(&temp, &mpc_mask, cpu_possible_mask);
@ -1980,7 +1981,6 @@ static int best_small_task_cpu(struct task_struct *p, int sync)
do {
rq = cpu_rq(i);
prev_cpu = (i == task_cpu(p));
cpumask_clear_cpu(i, &search_cpu);
@ -1993,28 +1993,21 @@ static int best_small_task_cpu(struct task_struct *p, int sync)
cpu_temp(i));
if (rq->max_possible_capacity == max_possible_capacity &&
hmp_capable) {
tload = scale_load_to_cpu(task_load(p), i);
cpu_cost = power_cost(tload, i);
if (cpu_cost < min_cost ||
(prev_cpu && cpu_cost == min_cost)) {
fallback_cpu = i;
min_cost = cpu_cost;
}
hmp_capable) {
cpumask_and(&fb_search_cpu, &search_cpu,
&rq->freq_domain_cpumask);
cpumask_andnot(&search_cpu, &search_cpu,
&rq->freq_domain_cpumask);
continue;
}
if (sched_cpu_high_irqload(i))
continue;
/* Todo this can be optimized to avoid checking c-state
* and moving cstate assignment statement inside the if */
cstate = rq->cstate;
if (idle_cpu(i) && cstate) {
if (cstate < min_cstate ||
(prev_cpu && cstate == min_cstate)) {
if (idle_cpu(i) && rq->cstate) {
if (rq->cstate < min_cstate) {
min_cstate_cpu = i;
min_cstate = cstate;
min_cstate = rq->cstate;
}
continue;
}
@ -2022,8 +2015,22 @@ static int best_small_task_cpu(struct task_struct *p, int sync)
cpu_load = cpu_load_sync(i, sync);
if (mostly_idle_cpu_sync(i, cpu_load, sync))
return i;
} while ((i = cpumask_first(&search_cpu)) < nr_cpu_ids);
tload = scale_load_to_cpu(task_load(p), i);
if (min_cstate_cpu != -1)
return min_cstate_cpu;
cpumask_and(&search_cpu, tsk_cpus_allowed(p), cpu_online_mask);
cpumask_andnot(&search_cpu, &search_cpu, &fb_search_cpu);
for_each_cpu(i, &search_cpu) {
rq = cpu_rq(i);
prev_cpu = (i == task_cpu(p));
if (sched_cpu_high_irqload(i))
continue;
tload = scale_load_to_cpu(task_load(p), i);
cpu_load = cpu_load_sync(i, sync);
if (!spill_threshold_crossed(tload, cpu_load, rq)) {
if (cpu_load < min_load ||
(prev_cpu && cpu_load == min_load)) {
@ -2031,14 +2038,24 @@ static int best_small_task_cpu(struct task_struct *p, int sync)
best_busy_cpu = i;
}
}
} while ((i = cpumask_first(&search_cpu)) < nr_cpu_ids);
if (min_cstate_cpu != -1)
return min_cstate_cpu;
}
if (best_busy_cpu != -1)
return best_busy_cpu;
for_each_cpu(i, &fb_search_cpu) {
rq = cpu_rq(i);
prev_cpu = (i == task_cpu(p));
tload = scale_load_to_cpu(task_load(p), i);
cpu_cost = power_cost(tload, i);
if (cpu_cost < min_cost ||
(prev_cpu && cpu_cost == min_cost)) {
fallback_cpu = i;
min_cost = cpu_cost;
}
}
return fallback_cpu;
}