diff --git a/include/linux/sched.h b/include/linux/sched.h index abd6f7662e30..d1f74e75928a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2065,6 +2065,7 @@ extern u64 cpu_clock(int cpu); extern u64 local_clock(void); extern u64 sched_clock_cpu(int cpu); +extern u64 sched_ktime_clock(void); extern void sched_clock_init(void); extern int sched_clock_initialized(void); diff --git a/include/linux/tick.h b/include/linux/tick.h index 5e917a25b1a3..741d61ea72b6 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -73,6 +73,7 @@ struct tick_sched { extern void __init tick_init(void); extern int tick_is_oneshot_available(void); extern u64 jiffy_to_sched_clock(u64 *now, u64 *jiffy_sched_clock); +extern u64 jiffy_to_ktime_ns(u64 *now, u64 *jiffy_ktime_ns); extern struct tick_device *tick_get_device(int cpu); # ifdef CONFIG_HIGH_RES_TIMERS diff --git a/kernel/sched/core.c b/kernel/sched/core.c index aeb41cf4636b..98576e76feb8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include @@ -842,6 +843,47 @@ void resched_task(struct task_struct *p) } #endif /* CONFIG_SMP */ +#ifdef CONFIG_SCHED_HMP +static ktime_t ktime_last; +static bool sched_ktime_suspended; + +u64 sched_ktime_clock(void) +{ + if (unlikely(sched_ktime_suspended)) + return ktime_to_ns(ktime_last); + return ktime_to_ns(ktime_get()); +} + +static void sched_resume(void) +{ + sched_ktime_suspended = false; +} + +static int sched_suspend(void) +{ + ktime_last = ktime_get(); + sched_ktime_suspended = true; + return 0; +} + +static struct syscore_ops sched_syscore_ops = { + .resume = sched_resume, + .suspend = sched_suspend +}; + +static int __init sched_init_ops(void) +{ + register_syscore_ops(&sched_syscore_ops); + return 0; +} +late_initcall(sched_init_ops); +#else +u64 sched_ktime_clock(void) +{ + return 0; +} +#endif /* CONFIG_SCHED_HMP */ + #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \ (defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH))) /* @@ -1293,7 +1335,7 @@ static void _set_preferred_cluster(struct related_thread_group *grp) * at same time. Avoid overhead in such cases of rechecking preferred * cluster */ - if (sched_clock() - grp->last_update < + if (sched_ktime_clock() - grp->last_update < sched_grp_min_cluster_update_delta) return; @@ -1303,7 +1345,7 @@ static void _set_preferred_cluster(struct related_thread_group *grp) } grp->preferred_cluster = best_cluster(grp, combined_demand); - grp->last_update = sched_clock(); + grp->last_update = sched_ktime_clock(); trace_sched_set_preferred_cluster(grp, combined_demand); } @@ -2304,16 +2346,20 @@ void sched_account_irqtime(int cpu, struct task_struct *curr, { struct rq *rq = cpu_rq(cpu); unsigned long flags, nr_windows; - u64 cur_jiffies_ts, now; + u64 cur_jiffies_ts; raw_spin_lock_irqsave(&rq->lock, flags); - now = sched_clock(); - delta += (now - wallclock); + /* + * cputime (wallclock) uses sched_clock so use the same here for + * consistency. + */ + delta += sched_clock() - wallclock; cur_jiffies_ts = get_jiffies_64(); if (is_idle_task(curr)) - update_task_ravg(curr, rq, IRQ_UPDATE, now, delta); + update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(), + delta); nr_windows = cur_jiffies_ts - rq->irqload_ts; @@ -2350,13 +2396,14 @@ static void reset_task_stats(struct task_struct *p) static inline void mark_task_starting(struct task_struct *p) { struct rq *rq = task_rq(p); - u64 wallclock = sched_clock(); + u64 wallclock; if (!rq->window_start || sched_disable_window_stats) { reset_task_stats(p); return; } + wallclock = sched_ktime_clock(); p->ravg.mark_start = wallclock; } @@ -2365,12 +2412,11 @@ static inline void set_window_start(struct rq *rq) int cpu = cpu_of(rq); struct rq *sync_rq = cpu_rq(sync_cpu); - if (rq->window_start || !sched_enable_hmp || - !sched_clock_initialized() || !sched_clock_cpu(cpu)) + if (rq->window_start || !sched_enable_hmp) return; if (cpu == sync_cpu) { - rq->window_start = sched_clock(); + rq->window_start = sched_ktime_clock(); } else { raw_spin_unlock(&rq->lock); double_rq_lock(rq, sync_rq); @@ -2424,7 +2470,7 @@ void sched_exit(struct task_struct *p) raw_spin_lock_irqsave(&rq->lock, flags); /* rq->curr == p */ - wallclock = sched_clock(); + wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); dequeue_task(rq, p, 0); reset_task_stats(p); @@ -2482,7 +2528,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) { int cpu; unsigned long flags; - u64 start_ts = sched_clock(); + u64 start_ts = sched_ktime_clock(); int reason = WINDOW_CHANGE; unsigned int old = 0, new = 0; unsigned int old_window_size = sched_ravg_window; @@ -2563,7 +2609,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size) local_irq_restore(flags); trace_sched_reset_all_window_stats(window_start, window_size, - sched_clock() - start_ts, reason, old, new); + sched_ktime_clock() - start_ts, reason, old, new); } #ifdef CONFIG_SCHED_FREQ_INPUT @@ -2586,7 +2632,7 @@ unsigned long sched_get_busy(int cpu) * that the window stats are current by doing an update. */ raw_spin_lock_irqsave(&rq->lock, flags); - update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0); + update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_ktime_clock(), 0); load = rq->old_busy_time = rq->prev_runnable_sum; /* @@ -2628,7 +2674,7 @@ void sched_set_io_is_busy(int val) int sched_set_window(u64 window_start, unsigned int window_size) { - u64 now, cur_jiffies, jiffy_sched_clock; + u64 now, cur_jiffies, jiffy_ktime_ns; s64 ws; unsigned long flags; @@ -2638,23 +2684,25 @@ int sched_set_window(u64 window_start, unsigned int window_size) mutex_lock(&policy_mutex); - /* Get a consistent view of sched_clock, jiffies, and the time - * since the last jiffy (based on last_jiffies_update). */ + /* + * Get a consistent view of ktime, jiffies, and the time + * since the last jiffy (based on last_jiffies_update). + */ local_irq_save(flags); - cur_jiffies = jiffy_to_sched_clock(&now, &jiffy_sched_clock); + cur_jiffies = jiffy_to_ktime_ns(&now, &jiffy_ktime_ns); local_irq_restore(flags); /* translate window_start from jiffies to nanoseconds */ ws = (window_start - cur_jiffies); /* jiffy difference */ ws *= TICK_NSEC; - ws += jiffy_sched_clock; + ws += jiffy_ktime_ns; /* roll back calculated window start so that it is in * the past (window stats must have a current window) */ while (ws > now) ws -= (window_size * TICK_NSEC); - BUG_ON(sched_clock() < ws); + BUG_ON(sched_ktime_clock() < ws); reset_all_window_stats(ws, window_size); @@ -2679,7 +2727,7 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu) if (sched_disable_window_stats) goto done; - wallclock = sched_clock(); + wallclock = sched_ktime_clock(); update_task_ravg(task_rq(p)->curr, task_rq(p), TASK_UPDATE, @@ -3004,7 +3052,8 @@ static int cpufreq_notifier_trans(struct notifier_block *nb, for_each_cpu_mask(i, cluster->cpus) { struct rq *rq = cpu_rq(i); raw_spin_lock_irqsave(&rq->lock, flags); - update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0); + update_task_ravg(rq->curr, rq, TASK_UPDATE, + sched_ktime_clock(), 0); raw_spin_unlock_irqrestore(&rq->lock, flags); } @@ -3069,7 +3118,8 @@ static inline int update_preferred_cluster(struct related_thread_group *grp, * has passed since we last updated preference */ if (abs(new_load - old_load) > sched_grp_min_task_load_delta || - sched_clock() - p->grp->last_update > sched_ravg_window) + sched_ktime_clock() - p->grp->last_update > + sched_ravg_window) return 1; return 0; @@ -3147,7 +3197,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu, pct_task_load(p)); - note_run_start(p, sched_clock()); + note_run_start(p, sched_ktime_clock()); if (task_cpu(p) != new_cpu) { struct task_migration_notifier tmn; @@ -3687,7 +3737,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) raw_spin_lock(&rq->lock); old_load = task_load(p); grp = task_related_thread_group(p); - wallclock = sched_clock(); + wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); heavy_task = heavy_task_wakeup(p, rq, TASK_WAKE); update_task_ravg(p, rq, TASK_WAKE, wallclock, 0); @@ -3782,7 +3832,7 @@ static void try_to_wake_up_local(struct task_struct *p) goto out; if (!p->on_rq) { - u64 wallclock = sched_clock(); + u64 wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); update_task_ravg(p, rq, TASK_WAKE, wallclock, 0); @@ -5146,7 +5196,7 @@ void scheduler_tick(void) update_rq_clock(rq); update_cpu_load_active(rq); curr->sched_class->task_tick(rq, curr, 0); - update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0); + update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_ktime_clock(), 0); raw_spin_unlock(&rq->lock); perf_event_task_tick(); @@ -5422,7 +5472,7 @@ need_resched: put_prev_task(rq, prev); next = pick_next_task(rq); - wallclock = sched_clock(); + wallclock = sched_ktime_clock(); if (!prev->on_rq) task_note_last_sleep(prev, wallclock); update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bf2f287ffdaf..aa49b075564e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3109,7 +3109,7 @@ static inline int is_cpu_throttling_imminent(int cpu) static inline int is_task_migration_throttled(struct task_struct *p) { - u64 delta = sched_clock() - p->run_start; + u64 delta = sched_ktime_clock() - p->run_start; return delta < sched_min_runtime; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 426e9e5f1257..879fac99e34c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -45,32 +45,15 @@ DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched); */ static ktime_t last_jiffies_update; -/* - * Conversion from ktime to sched_clock is error prone. Use this - * as a safetly margin when calculating the sched_clock value at - * a particular jiffy as last_jiffies_update uses ktime. - */ -#define SCHED_CLOCK_MARGIN 100000 - -static u64 ns_since_jiffy(void) -{ - ktime_t delta; - - delta = ktime_sub(ktime_get(), last_jiffies_update); - - return ktime_to_ns(delta); -} - -u64 jiffy_to_sched_clock(u64 *now, u64 *jiffy_sched_clock) +u64 jiffy_to_ktime_ns(u64 *now, u64 *jiffy_ktime_ns) { u64 cur_jiffies; unsigned long seq; do { seq = read_seqbegin(&jiffies_lock); - *now = sched_clock(); - *jiffy_sched_clock = *now - - (ns_since_jiffy() + SCHED_CLOCK_MARGIN); + *now = ktime_to_ns(ktime_get()); + *jiffy_ktime_ns = ktime_to_ns(last_jiffies_update); cur_jiffies = get_jiffies_64(); } while (read_seqretry(&jiffies_lock, seq));