perfcounters: generalize the counter scheduler

Impact: clean up and refactor code

refactor the counter scheduler: separate out in/out functions and
introduce a counter-rotation function as well.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar 2008-12-21 14:43:25 +01:00
parent 8fe91e61cd
commit 235c7fc7c5

View file

@ -111,11 +111,12 @@ static void __perf_counter_remove_from_context(void *info)
spin_lock(&ctx->lock);
if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
counter->hw_ops->disable(counter);
counter->state = PERF_COUNTER_STATE_INACTIVE;
counter->hw_ops->disable(counter);
ctx->nr_active--;
cpuctx->active_oncpu--;
counter->task = NULL;
counter->oncpu = -1;
}
ctx->nr_counters--;
@ -192,8 +193,36 @@ retry:
spin_unlock_irq(&ctx->lock);
}
static int
counter_sched_in(struct perf_counter *counter,
struct perf_cpu_context *cpuctx,
struct perf_counter_context *ctx,
int cpu)
{
if (counter->state == PERF_COUNTER_STATE_OFF)
return 0;
counter->state = PERF_COUNTER_STATE_ACTIVE;
counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
/*
* The new state must be visible before we turn it on in the hardware:
*/
smp_wmb();
if (counter->hw_ops->enable(counter)) {
counter->state = PERF_COUNTER_STATE_INACTIVE;
counter->oncpu = -1;
return -EAGAIN;
}
cpuctx->active_oncpu++;
ctx->nr_active++;
return 0;
}
/*
* Cross CPU call to install and enable a preformance counter
* Cross CPU call to install and enable a performance counter
*/
static void __perf_install_in_context(void *info)
{
@ -220,22 +249,17 @@ static void __perf_install_in_context(void *info)
* counters on a global level. NOP for non NMI based counters.
*/
perf_flags = hw_perf_save_disable();
list_add_counter(counter, ctx);
hw_perf_restore(perf_flags);
list_add_counter(counter, ctx);
ctx->nr_counters++;
if (cpuctx->active_oncpu < perf_max_counters) {
counter->state = PERF_COUNTER_STATE_ACTIVE;
counter->oncpu = cpu;
ctx->nr_active++;
cpuctx->active_oncpu++;
counter->hw_ops->enable(counter);
}
counter_sched_in(counter, cpuctx, ctx, cpu);
if (!ctx->task && cpuctx->max_pertask)
cpuctx->max_pertask--;
hw_perf_restore(perf_flags);
spin_unlock(&ctx->lock);
curr_rq_unlock_irq_restore(&flags);
}
@ -302,8 +326,8 @@ counter_sched_out(struct perf_counter *counter,
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
return;
counter->hw_ops->disable(counter);
counter->state = PERF_COUNTER_STATE_INACTIVE;
counter->hw_ops->disable(counter);
counter->oncpu = -1;
cpuctx->active_oncpu--;
@ -326,6 +350,22 @@ group_sched_out(struct perf_counter *group_counter,
counter_sched_out(counter, cpuctx, ctx);
}
void __perf_counter_sched_out(struct perf_counter_context *ctx,
struct perf_cpu_context *cpuctx)
{
struct perf_counter *counter;
if (likely(!ctx->nr_counters))
return;
spin_lock(&ctx->lock);
if (ctx->nr_active) {
list_for_each_entry(counter, &ctx->counter_list, list_entry)
group_sched_out(counter, cpuctx, ctx);
}
spin_unlock(&ctx->lock);
}
/*
* Called from scheduler to remove the counters of the current task,
* with interrupts disabled.
@ -341,39 +381,18 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
{
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
struct perf_counter_context *ctx = &task->perf_counter_ctx;
struct perf_counter *counter;
if (likely(!cpuctx->task_ctx))
return;
spin_lock(&ctx->lock);
if (ctx->nr_active) {
list_for_each_entry(counter, &ctx->counter_list, list_entry)
group_sched_out(counter, cpuctx, ctx);
}
spin_unlock(&ctx->lock);
__perf_counter_sched_out(ctx, cpuctx);
cpuctx->task_ctx = NULL;
}
static int
counter_sched_in(struct perf_counter *counter,
struct perf_cpu_context *cpuctx,
struct perf_counter_context *ctx,
int cpu)
static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
{
if (counter->state == PERF_COUNTER_STATE_OFF)
return 0;
if (counter->hw_ops->enable(counter))
return -EAGAIN;
counter->state = PERF_COUNTER_STATE_ACTIVE;
counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
cpuctx->active_oncpu++;
ctx->nr_active++;
return 0;
__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
}
static int
@ -416,21 +435,10 @@ group_error:
return -EAGAIN;
}
/*
* Called from scheduler to add the counters of the current task
* with interrupts disabled.
*
* We restore the counter value and then enable it.
*
* This does not protect us against NMI, but enable()
* sets the enabled bit in the control field of counter _before_
* accessing the counter control register. If a NMI hits, then it will
* keep the counter running.
*/
void perf_counter_task_sched_in(struct task_struct *task, int cpu)
static void
__perf_counter_sched_in(struct perf_counter_context *ctx,
struct perf_cpu_context *cpuctx, int cpu)
{
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
struct perf_counter_context *ctx = &task->perf_counter_ctx;
struct perf_counter *counter;
if (likely(!ctx->nr_counters))
@ -453,10 +461,35 @@ void perf_counter_task_sched_in(struct task_struct *task, int cpu)
break;
}
spin_unlock(&ctx->lock);
}
/*
* Called from scheduler to add the counters of the current task
* with interrupts disabled.
*
* We restore the counter value and then enable it.
*
* This does not protect us against NMI, but enable()
* sets the enabled bit in the control field of counter _before_
* accessing the counter control register. If a NMI hits, then it will
* keep the counter running.
*/
void perf_counter_task_sched_in(struct task_struct *task, int cpu)
{
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
struct perf_counter_context *ctx = &task->perf_counter_ctx;
__perf_counter_sched_in(ctx, cpuctx, cpu);
cpuctx->task_ctx = ctx;
}
static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
{
struct perf_counter_context *ctx = &cpuctx->ctx;
__perf_counter_sched_in(ctx, cpuctx, cpu);
}
int perf_counter_task_disable(void)
{
struct task_struct *curr = current;
@ -514,6 +547,8 @@ int perf_counter_task_enable(void)
/* force the update of the task clock: */
__task_delta_exec(curr, 1);
perf_counter_task_sched_out(curr, cpu);
spin_lock(&ctx->lock);
/*
@ -538,19 +573,18 @@ int perf_counter_task_enable(void)
return 0;
}
void perf_counter_task_tick(struct task_struct *curr, int cpu)
/*
* Round-robin a context's counters:
*/
static void rotate_ctx(struct perf_counter_context *ctx)
{
struct perf_counter_context *ctx = &curr->perf_counter_ctx;
struct perf_counter *counter;
u64 perf_flags;
if (likely(!ctx->nr_counters))
if (!ctx->nr_counters)
return;
perf_counter_task_sched_out(curr, cpu);
spin_lock(&ctx->lock);
/*
* Rotate the first entry last (works just fine for group counters too):
*/
@ -563,7 +597,24 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
hw_perf_restore(perf_flags);
spin_unlock(&ctx->lock);
}
void perf_counter_task_tick(struct task_struct *curr, int cpu)
{
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
struct perf_counter_context *ctx = &curr->perf_counter_ctx;
const int rotate_percpu = 0;
if (rotate_percpu)
perf_counter_cpu_sched_out(cpuctx);
perf_counter_task_sched_out(curr, cpu);
if (rotate_percpu)
rotate_ctx(&cpuctx->ctx);
rotate_ctx(ctx);
if (rotate_percpu)
perf_counter_cpu_sched_in(cpuctx, cpu);
perf_counter_task_sched_in(curr, cpu);
}
@ -905,8 +956,6 @@ static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
struct task_struct *curr = counter->task;
u64 delta;
WARN_ON_ONCE(counter->task != current);
delta = __task_delta_exec(curr, update);
return curr->se.sum_exec_runtime + delta;
@ -1160,6 +1209,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
counter->group_leader = group_leader;
counter->hw_ops = NULL;
counter->state = PERF_COUNTER_STATE_INACTIVE;
if (hw_event->disabled)
counter->state = PERF_COUNTER_STATE_OFF;
@ -1331,36 +1381,50 @@ __perf_counter_exit_task(struct task_struct *child,
{
struct perf_counter *parent_counter;
u64 parent_val, child_val;
unsigned long flags;
u64 perf_flags;
/*
* Disable and unlink this counter.
*
* Be careful about zapping the list - IRQ/NMI context
* could still be processing it:
* If we do not self-reap then we have to wait for the
* child task to unschedule (it will happen for sure),
* so that its counter is at its final count. (This
* condition triggers rarely - child tasks usually get
* off their CPU before the parent has a chance to
* get this far into the reaping action)
*/
curr_rq_lock_irq_save(&flags);
perf_flags = hw_perf_save_disable();
if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
if (child != current) {
wait_task_inactive(child, 0);
list_del_init(&child_counter->list_entry);
} else {
struct perf_cpu_context *cpuctx;
unsigned long flags;
u64 perf_flags;
/*
* Disable and unlink this counter.
*
* Be careful about zapping the list - IRQ/NMI context
* could still be processing it:
*/
curr_rq_lock_irq_save(&flags);
perf_flags = hw_perf_save_disable();
cpuctx = &__get_cpu_var(perf_cpu_context);
child_counter->hw_ops->disable(child_counter);
child_counter->state = PERF_COUNTER_STATE_INACTIVE;
child_counter->oncpu = -1;
if (child_counter->state == PERF_COUNTER_STATE_ACTIVE) {
child_counter->state = PERF_COUNTER_STATE_INACTIVE;
child_counter->hw_ops->disable(child_counter);
cpuctx->active_oncpu--;
child_ctx->nr_active--;
child_counter->oncpu = -1;
}
cpuctx->active_oncpu--;
child_ctx->nr_active--;
list_del_init(&child_counter->list_entry);
child_ctx->nr_counters--;
hw_perf_restore(perf_flags);
curr_rq_unlock_irq_restore(&flags);
}
list_del_init(&child_counter->list_entry);
hw_perf_restore(perf_flags);
curr_rq_unlock_irq_restore(&flags);
parent_counter = child_counter->parent;
/*
* It can happen that parent exits first, and has counters