cpuidle: lpm-levels: Consider cluster history for LPM selection

Consider recent history (residencies) of the cluster and core
low power modes while the cluster level low power mode to enter
is selected.

Change-Id: Ifdc847a71136f55aded8a758b92179bb9aebfdcb
Signed-off-by: Srinivas Rao L <lsrao@codeaurora.org>
This commit is contained in:
Srinivas Rao L 2016-06-07 11:13:04 +05:30
parent e4c619e932
commit 28caff276f
3 changed files with 381 additions and 16 deletions

View File

@ -82,8 +82,6 @@ struct lpm_debug {
struct lpm_cluster *lpm_root_node;
#define MAXSAMPLES 5
static bool lpm_prediction;
module_param_named(lpm_prediction,
lpm_prediction, bool, S_IRUGO | S_IWUSR | S_IWGRP);
@ -105,6 +103,7 @@ struct lpm_history {
uint32_t hptr;
bool hinvalid;
bool htmr_wkup;
int64_t stime;
};
static DEFINE_PER_CPU(struct lpm_history, hist);
@ -237,9 +236,6 @@ static enum hrtimer_restart lpm_hrtimer_cb(struct hrtimer *h)
static void histtimer_cancel(void)
{
if (!lpm_prediction)
return;
hrtimer_try_to_cancel(&histtimer);
}
@ -261,6 +257,51 @@ static void histtimer_start(uint32_t time_us)
hrtimer_start(&histtimer, hist_ktime, HRTIMER_MODE_REL_PINNED);
}
static void cluster_timer_init(struct lpm_cluster *cluster)
{
struct list_head *list;
if (!cluster)
return;
hrtimer_init(&cluster->histtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
list_for_each(list, &cluster->child) {
struct lpm_cluster *n;
n = list_entry(list, typeof(*n), list);
cluster_timer_init(n);
}
}
static void clusttimer_cancel(void)
{
int cpu = raw_smp_processor_id();
struct lpm_cluster *cluster = per_cpu(cpu_cluster, cpu);
hrtimer_try_to_cancel(&cluster->histtimer);
hrtimer_try_to_cancel(&cluster->parent->histtimer);
}
static enum hrtimer_restart clusttimer_fn(struct hrtimer *h)
{
struct lpm_cluster *cluster = container_of(h,
struct lpm_cluster, histtimer);
cluster->history.hinvalid = 1;
return HRTIMER_NORESTART;
}
static void clusttimer_start(struct lpm_cluster *cluster, uint32_t time_us)
{
uint64_t time_ns = time_us * NSEC_PER_USEC;
ktime_t clust_ktime = ns_to_ktime(time_ns);
cluster->histtimer.function = clusttimer_fn;
hrtimer_start(&cluster->histtimer, clust_ktime,
HRTIMER_MODE_REL_PINNED);
}
static void msm_pm_set_timer(uint32_t modified_time_us)
{
u64 modified_time_ns = modified_time_us * NSEC_PER_USEC;
@ -370,11 +411,14 @@ static uint64_t lpm_cpuidle_predict(struct cpuidle_device *dev,
if (history->hinvalid) {
history->hinvalid = 0;
history->htmr_wkup = 1;
history->stime = 0;
return 0;
}
if (history->nsamp < MAXSAMPLES)
if (history->nsamp < MAXSAMPLES) {
history->stime = 0;
return 0;
}
again:
max = avg = divisor = stddev = 0;
@ -404,6 +448,7 @@ again:
if (((avg > stddev * 6) && (divisor >= (MAXSAMPLES - 1)))
|| stddev <= ref_stddev) {
history->stime = ktime_to_us(ktime_get()) + avg;
return avg;
} else if (divisor > (MAXSAMPLES - 1)) {
thresh = max - 1;
@ -426,6 +471,8 @@ again:
*idx_restrict = j;
do_div(total, failed);
*idx_restrict_time = total;
history->stime = ktime_to_us(ktime_get())
+ *idx_restrict_time;
break;
}
}
@ -443,6 +490,7 @@ static inline void invalidate_predict_history(struct cpuidle_device *dev)
if (history->hinvalid) {
history->hinvalid = 0;
history->htmr_wkup = 1;
history->stime = 0;
}
}
@ -462,6 +510,7 @@ static void clear_predict_history(void)
history->mode[i] = -1;
history->hptr = 0;
history->nsamp = 0;
history->stime = 0;
}
}
}
@ -578,13 +627,15 @@ static int cpu_power_select(struct cpuidle_device *dev,
}
static uint64_t get_cluster_sleep_time(struct lpm_cluster *cluster,
struct cpumask *mask, bool from_idle)
struct cpumask *mask, bool from_idle, uint32_t *pred_time)
{
int cpu;
int next_cpu = raw_smp_processor_id();
ktime_t next_event;
struct tick_device *td;
struct cpumask online_cpus_in_cluster;
struct lpm_history *history;
int64_t prediction = LONG_MAX;
next_event.tv64 = KTIME_MAX;
if (!suspend_wake_time)
@ -607,11 +658,21 @@ static uint64_t get_cluster_sleep_time(struct lpm_cluster *cluster,
next_event.tv64 = td->evtdev->next_event.tv64;
next_cpu = cpu;
}
if (from_idle && pred_time && lpm_prediction) {
history = &per_cpu(hist, cpu);
if (history->stime && (history->stime < prediction))
prediction = history->stime;
}
}
if (mask)
cpumask_copy(mask, cpumask_of(next_cpu));
if (from_idle && pred_time && lpm_prediction) {
if (prediction > ktime_to_us(ktime_get()))
*pred_time = prediction - ktime_to_us(ktime_get());
}
if (ktime_to_us(next_event) > ktime_to_us(ktime_get()))
return ktime_to_us(ktime_sub(next_event, ktime_get()));
@ -619,23 +680,196 @@ static uint64_t get_cluster_sleep_time(struct lpm_cluster *cluster,
return 0;
}
static int cluster_select(struct lpm_cluster *cluster, bool from_idle)
static int cluster_predict(struct lpm_cluster *cluster,
uint32_t *pred_us)
{
int i, j;
int ret = 0;
struct cluster_history *history = &cluster->history;
int64_t cur_time = ktime_to_us(ktime_get());
if (history->hinvalid) {
history->hinvalid = 0;
history->htmr_wkup = 1;
history->flag = 0;
return ret;
}
if (history->nsamp == MAXSAMPLES) {
for (i = 0; i < MAXSAMPLES; i++) {
if ((cur_time - history->stime[i])
> CLUST_SMPL_INVLD_TIME)
history->nsamp--;
}
}
if (history->nsamp < MAXSAMPLES) {
history->flag = 0;
return ret;
}
if (history->flag == 2)
history->flag = 0;
if (history->htmr_wkup != 1) {
uint64_t total = 0;
if (history->flag == 1) {
for (i = 0; i < MAXSAMPLES; i++)
total += history->resi[i];
do_div(total, MAXSAMPLES);
*pred_us = total;
return 2;
}
for (j = 1; j < cluster->nlevels; j++) {
uint32_t failed = 0;
total = 0;
for (i = 0; i < MAXSAMPLES; i++) {
if ((history->mode[i] == j) && (history->resi[i]
< cluster->levels[j].pwr.min_residency)) {
failed++;
total += history->resi[i];
}
}
if (failed > (MAXSAMPLES-2)) {
do_div(total, failed);
*pred_us = total;
history->flag = 1;
return 1;
}
}
}
return ret;
}
static void update_cluster_history_time(struct cluster_history *history,
int idx, uint64_t start)
{
history->entry_idx = idx;
history->entry_time = start;
}
static void update_cluster_history(struct cluster_history *history, int idx)
{
uint32_t tmr = 0;
uint32_t residency = 0;
struct lpm_cluster *cluster =
container_of(history, struct lpm_cluster, history);
if (!lpm_prediction)
return;
if ((history->entry_idx == -1) || (history->entry_idx == idx)) {
residency = ktime_to_us(ktime_get()) - history->entry_time;
history->stime[history->hptr] = history->entry_time;
} else
return;
if (history->htmr_wkup) {
if (!history->hptr)
history->hptr = MAXSAMPLES-1;
else
history->hptr--;
history->resi[history->hptr] += residency;
history->htmr_wkup = 0;
tmr = 1;
} else {
history->resi[history->hptr] = residency;
}
history->mode[history->hptr] = idx;
history->entry_idx = INT_MIN;
history->entry_time = 0;
if (history->nsamp < MAXSAMPLES)
history->nsamp++;
trace_cluster_pred_hist(cluster->cluster_name,
history->mode[history->hptr], history->resi[history->hptr],
history->hptr, tmr);
(history->hptr)++;
if (history->hptr >= MAXSAMPLES)
history->hptr = 0;
}
static void clear_cl_history_each(struct cluster_history *history)
{
int i;
for (i = 0; i < MAXSAMPLES; i++) {
history->resi[i] = 0;
history->mode[i] = -1;
history->stime[i] = 0;
}
history->hptr = 0;
history->nsamp = 0;
history->flag = 0;
history->hinvalid = 0;
history->htmr_wkup = 0;
}
static void clear_cl_predict_history(void)
{
struct lpm_cluster *cluster = lpm_root_node;
struct list_head *list;
if (!lpm_prediction)
return;
clear_cl_history_each(&cluster->history);
list_for_each(list, &cluster->child) {
struct lpm_cluster *n;
n = list_entry(list, typeof(*n), list);
clear_cl_history_each(&n->history);
}
}
static int cluster_select(struct lpm_cluster *cluster, bool from_idle,
int *ispred)
{
int best_level = -1;
int i;
struct cpumask mask;
uint32_t latency_us = ~0U;
uint32_t sleep_us;
uint32_t cpupred_us = 0, pred_us = 0;
int pred_mode = 0, predicted = 0;
if (!cluster)
return -EINVAL;
sleep_us = (uint32_t)get_cluster_sleep_time(cluster, NULL, from_idle);
sleep_us = (uint32_t)get_cluster_sleep_time(cluster, NULL,
from_idle, &cpupred_us);
if (smp_processor_id() < 4)
cl0_sleep_us = sleep_us;
else
cl1_sleep_us = sleep_us;
if (from_idle && lpm_prediction) {
pred_mode = cluster_predict(cluster, &pred_us);
if (cpupred_us && pred_mode && (cpupred_us < pred_us))
pred_us = cpupred_us;
if (pred_us && pred_mode && (pred_us < sleep_us))
predicted = 1;
if (predicted && (pred_us == cpupred_us))
predicted = 2;
}
if (cpumask_and(&mask, cpu_online_mask, &cluster->child_cpus))
latency_us = pm_qos_request_for_cpumask(PM_QOS_CPU_DMA_LATENCY,
&mask);
@ -680,11 +914,20 @@ static int cluster_select(struct lpm_cluster *cluster, bool from_idle)
* if none of the previous levels are enabled,
* min_residency is time overhead for current level
*/
if (sleep_us >= pwr_params->min_residency) {
if (predicted ? (pred_us >= pwr_params->min_residency)
: (sleep_us >= pwr_params->min_residency)) {
best_level = i;
}
}
if ((best_level == (cluster->nlevels - 1)) && (pred_mode == 2))
cluster->history.flag = 2;
*ispred = predicted;
trace_cluster_pred_select(cluster->cluster_name, best_level, sleep_us,
latency_us, predicted, pred_us);
return best_level;
}
@ -698,12 +941,12 @@ static void cluster_notify(struct lpm_cluster *cluster,
}
static int cluster_configure(struct lpm_cluster *cluster, int idx,
bool from_idle)
bool from_idle, int predicted)
{
struct lpm_cluster_level *level = &cluster->levels[idx];
int ret, i;
uint32_t sleep_us;
unsigned int cpu = raw_smp_processor_id();
spin_lock(&cluster->sync_lock);
@ -726,6 +969,10 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx,
cluster->num_children_in_sync.bits[0],
cluster->child_cpus.bits[0], from_idle);
lpm_stats_cluster_enter(cluster->stats, idx);
if (from_idle && lpm_prediction)
update_cluster_history_time(&cluster->history, idx,
ktime_to_us(ktime_get()));
}
for (i = 0; i < cluster->ndevices; i++) {
@ -738,7 +985,8 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx,
struct cpumask nextcpu, *cpumask;
uint32_t us;
us = get_cluster_sleep_time(cluster, &nextcpu, from_idle);
us = get_cluster_sleep_time(cluster, &nextcpu,
from_idle, NULL);
cpumask = level->disable_dynamic_routing ? NULL : &nextcpu;
ret = msm_rpm_enter_sleep(0, cpumask);
@ -748,6 +996,7 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx,
}
clear_predict_history();
clear_cl_predict_history();
do_div(us, USEC_PER_SEC/SCLK_HZ);
msm_mpm_enter_sleep((uint32_t)us, from_idle, cpumask);
@ -757,6 +1006,15 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx,
cluster_notify(cluster, level, true);
cluster->last_level = idx;
if (predicted && (idx < (cluster->nlevels - 1))) {
struct power_params *pwr_params = &cluster->levels[idx].pwr;
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
clusttimer_start(cluster, pwr_params->max_residency + tmr_add);
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
}
spin_unlock(&cluster->sync_lock);
return 0;
@ -776,6 +1034,8 @@ static void cluster_prepare(struct lpm_cluster *cluster,
const struct cpumask *cpu, int child_idx, bool from_idle)
{
int i;
int predicted = 0;
unsigned int ncpu = raw_smp_processor_id();
if (!cluster)
return;
@ -809,12 +1069,30 @@ static void cluster_prepare(struct lpm_cluster *cluster,
}
spin_unlock(&cluster->sync_lock);
i = cluster_select(cluster, from_idle);
i = cluster_select(cluster, from_idle, &predicted);
if (((i < 0) || (i == cluster->default_level))
&& predicted && from_idle) {
update_cluster_history_time(&cluster->history,
-1, ktime_to_us(ktime_get()));
if (i < 0) {
struct power_params *pwr_params =
&cluster->levels[0].pwr;
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
&ncpu);
clusttimer_start(cluster,
pwr_params->max_residency + tmr_add);
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
&ncpu);
}
}
if (i < 0)
return;
if (cluster_configure(cluster, i, from_idle))
if (cluster_configure(cluster, i, from_idle, predicted))
return;
cluster_prepare(cluster->parent, &cluster->num_children_in_sync, i,
@ -849,6 +1127,10 @@ static void cluster_unprepare(struct lpm_cluster *cluster,
&lvl->num_cpu_votes, cpu);
}
if (from_idle && first_cpu &&
(cluster->last_level == cluster->default_level))
update_cluster_history(&cluster->history, cluster->last_level);
if (!first_cpu || cluster->last_level == cluster->default_level)
goto unlock_return;
@ -882,6 +1164,10 @@ static void cluster_unprepare(struct lpm_cluster *cluster,
}
cluster_notify(cluster, &cluster->levels[last_level], false);
if (from_idle)
update_cluster_history(&cluster->history, last_level);
unlock_return:
spin_unlock(&cluster->sync_lock);
cluster_unprepare(cluster->parent, &cluster->child_cpus,
@ -1095,7 +1381,10 @@ static int lpm_cpuidle_enter(struct cpuidle_device *dev,
exit:
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
local_irq_enable();
histtimer_cancel();
if (lpm_prediction) {
histtimer_cancel();
clusttimer_cancel();
}
return idx;
}
@ -1355,6 +1644,7 @@ static int lpm_probe(struct platform_device *pdev)
suspend_set_ops(&lpm_suspend_ops);
hrtimer_init(&lpm_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer_init(&histtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cluster_timer_init(lpm_root_node);
ret = remote_spin_lock_init(&scm_handoff_lock, SCM_HANDOFF_LOCK_ID);
if (ret) {

View File

@ -14,6 +14,8 @@
#include <soc/qcom/spm.h>
#define NR_LPM_LEVELS 8
#define MAXSAMPLES 5
#define CLUST_SMPL_INVLD_TIME 40000
extern bool use_psci;
@ -81,6 +83,19 @@ struct low_power_ops {
enum msm_pm_l2_scm_flag tz_flag;
};
struct cluster_history {
uint32_t resi[MAXSAMPLES];
int mode[MAXSAMPLES];
int64_t stime[MAXSAMPLES];
uint32_t hptr;
bool hinvalid;
bool htmr_wkup;
uint64_t entry_time;
int entry_idx;
int nsamp;
int flag;
};
struct lpm_cluster {
struct list_head list;
struct list_head child;
@ -105,6 +120,8 @@ struct lpm_cluster {
unsigned int psci_mode_shift;
unsigned int psci_mode_mask;
bool no_saw_devices;
struct cluster_history history;
struct hrtimer histtimer;
};
int set_l2_mode(struct low_power_ops *ops, int mode, bool notify_rpm);

View File

@ -192,6 +192,64 @@ TRACE_EVENT(cluster_exit,
__entry->from_idle)
);
TRACE_EVENT(cluster_pred_select,
TP_PROTO(const char *name, int index, u32 sleep_us,
u32 latency, int pred, u32 pred_us),
TP_ARGS(name, index, sleep_us, latency, pred, pred_us),
TP_STRUCT__entry(
__field(const char *, name)
__field(int, index)
__field(u32, sleep_us)
__field(u32, latency)
__field(int, pred)
__field(u32, pred_us)
),
TP_fast_assign(
__entry->name = name;
__entry->index = index;
__entry->sleep_us = sleep_us;
__entry->latency = latency;
__entry->pred = pred;
__entry->pred_us = pred_us;
),
TP_printk("name:%s idx:%d sleep_time:%u latency:%u pred:%d pred_us:%u",
__entry->name, __entry->index, __entry->sleep_us,
__entry->latency, __entry->pred, __entry->pred_us)
);
TRACE_EVENT(cluster_pred_hist,
TP_PROTO(const char *name, int idx, u32 resi,
u32 sample, u32 tmr),
TP_ARGS(name, idx, resi, sample, tmr),
TP_STRUCT__entry(
__field(const char *, name)
__field(int, idx)
__field(u32, resi)
__field(u32, sample)
__field(u32, tmr)
),
TP_fast_assign(
__entry->name = name;
__entry->idx = idx;
__entry->resi = resi;
__entry->sample = sample;
__entry->tmr = tmr;
),
TP_printk("name:%s idx:%d resi:%u sample:%u tmr:%u",
__entry->name, __entry->idx, __entry->resi,
__entry->sample, __entry->tmr)
);
TRACE_EVENT(pre_pc_cb,
TP_PROTO(int tzflag),