android_kernel_samsung_msm8976/drivers/cpufreq/cpufreq_stats.c

763 lines
19 KiB
C
Raw Permalink Normal View History

/*
* drivers/cpufreq/cpufreq_stats.c
*
* Copyright (C) 2003-2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
* (C) 2004 Zou Nan hai <nanhai.zou@intel.com>.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sort.h>
#include <linux/err.h>
#include <linux/of.h>
#include <linux/sched.h>
#include <asm/cputime.h>
static spinlock_t cpufreq_stats_lock;
struct cpufreq_stats {
unsigned int cpu;
unsigned int total_trans;
unsigned long long last_time;
unsigned int max_state;
unsigned int state_num;
unsigned int last_index;
u64 *time_in_state;
unsigned int *freq_table;
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
unsigned int *trans_table;
#endif
};
struct all_cpufreq_stats {
unsigned int state_num;
cputime64_t *time_in_state;
unsigned int *freq_table;
};
struct cpufreq_power_stats {
unsigned int state_num;
unsigned int *curr;
unsigned int *freq_table;
};
struct all_freq_table {
unsigned int *freq_table;
unsigned int table_size;
};
static struct all_freq_table *all_freq_table;
static DEFINE_PER_CPU(struct all_cpufreq_stats *, all_cpufreq_stats);
static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table);
static DEFINE_PER_CPU(struct cpufreq_power_stats *, cpufreq_power_stats);
struct cpufreq_stats_attribute {
struct attribute attr;
ssize_t(*show) (struct cpufreq_stats *, char *);
};
static int cpufreq_stats_update(unsigned int cpu)
{
struct cpufreq_stats *stat;
struct all_cpufreq_stats *all_stat;
unsigned long long cur_time;
cur_time = get_jiffies_64();
spin_lock(&cpufreq_stats_lock);
stat = per_cpu(cpufreq_stats_table, cpu);
all_stat = per_cpu(all_cpufreq_stats, cpu);
if (!stat) {
spin_unlock(&cpufreq_stats_lock);
return 0;
}
if (stat->time_in_state) {
stat->time_in_state[stat->last_index] +=
cur_time - stat->last_time;
if (all_stat)
all_stat->time_in_state[stat->last_index] +=
cur_time - stat->last_time;
}
stat->last_time = cur_time;
spin_unlock(&cpufreq_stats_lock);
return 0;
}
static ssize_t show_total_trans(struct cpufreq_policy *policy, char *buf)
{
struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return 0;
return sprintf(buf, "%d\n",
per_cpu(cpufreq_stats_table, stat->cpu)->total_trans);
}
static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
{
ssize_t len = 0;
int i;
struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return 0;
cpufreq_stats_update(stat->cpu);
for (i = 0; i < stat->state_num; i++) {
len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i],
(unsigned long long)
cpufreq: Fix wrong time unit conversion The time spent by a CPU under a given frequency is stored in jiffies unit in the cpu var cpufreq_stats_table->time_in_state[i], i being the index of the frequency. This is what is displayed in the following file on the right column: cat /sys/devices/system/cpu/cpuX/cpufreq/stats/time_in_state 2301000 19835820 2300000 3172 [...] Now cpufreq converts this jiffies unit delta to clock_t before returning it to the user as in the above file. And that conversion is achieved using the API cputime64_to_clock_t(). Although it accidentally works on traditional tick based cputime accounting, where cputime_t maps directly to jiffies, it doesn't work with other types of cputime accounting such as CONFIG_VIRT_CPU_ACCOUNTING_* where cputime_t can map to nsecs or any granularity preffered by the architecture. For example we get a buggy zero delta on full dyntick configurations: cat /sys/devices/system/cpu/cpuX/cpufreq/stats/time_in_state 2301000 0 2300000 0 [...] Fix this with using the proper jiffies_64_t to clock_t conversion. Reported-and-tested-by: Carsten Emde <C.Emde@osadl.org> Signed-off-by: Andreas Schwab <schwab@linux-m68k.org> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Git-commit: a857c0b9e24e39fe5be82451b65377795f9538d8 Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Signed-off-by: Junjie Wu <junjiew@codeaurora.org>
2013-09-07 16:35:08 +00:00
jiffies_64_to_clock_t(stat->time_in_state[i]));
}
return len;
}
static int get_index_all_cpufreq_stat(struct all_cpufreq_stats *all_stat,
unsigned int freq)
{
int i;
if (!all_stat)
return -1;
for (i = 0; i < all_stat->state_num; i++) {
if (all_stat->freq_table[i] == freq)
return i;
}
return -1;
}
void acct_update_power(struct task_struct *task, cputime_t cputime)
{
struct cpufreq_power_stats *powerstats;
struct cpufreq_stats *stats;
unsigned int cpu_num, curr;
if (!task)
return;
cpu_num = task_cpu(task);
powerstats = per_cpu(cpufreq_power_stats, cpu_num);
stats = per_cpu(cpufreq_stats_table, cpu_num);
if (!powerstats || !stats)
return;
curr = powerstats->curr[stats->last_index];
if (task->cpu_power != ULLONG_MAX)
task->cpu_power += curr * cputime_to_usecs(cputime);
}
EXPORT_SYMBOL_GPL(acct_update_power);
static ssize_t show_current_in_state(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
ssize_t len = 0;
unsigned int i, cpu;
struct cpufreq_power_stats *powerstats;
spin_lock(&cpufreq_stats_lock);
for_each_possible_cpu(cpu) {
powerstats = per_cpu(cpufreq_power_stats, cpu);
if (!powerstats)
continue;
len += scnprintf(buf + len, PAGE_SIZE - len, "CPU%d:", cpu);
for (i = 0; i < powerstats->state_num; i++)
len += scnprintf(buf + len, PAGE_SIZE - len,
"%d=%d ", powerstats->freq_table[i],
powerstats->curr[i]);
len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
}
spin_unlock(&cpufreq_stats_lock);
return len;
}
static ssize_t show_all_time_in_state(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
ssize_t len = 0;
unsigned int i, cpu, freq, index;
struct all_cpufreq_stats *all_stat;
len += scnprintf(buf + len, PAGE_SIZE - len, "freq\t\t");
for_each_possible_cpu(cpu) {
len += scnprintf(buf + len, PAGE_SIZE - len, "cpu%d\t\t", cpu);
if (cpu_online(cpu))
cpufreq_stats_update(cpu);
}
if (!all_freq_table)
goto out;
for (i = 0; i < all_freq_table->table_size; i++) {
freq = all_freq_table->freq_table[i];
len += scnprintf(buf + len, PAGE_SIZE - len, "\n%u\t\t", freq);
for_each_possible_cpu(cpu) {
all_stat = per_cpu(all_cpufreq_stats, cpu);
index = get_index_all_cpufreq_stat(all_stat, freq);
if (index != -1) {
len += scnprintf(buf + len, PAGE_SIZE - len,
"%llu\t\t", (unsigned long long)
cputime64_to_clock_t(all_stat->time_in_state[index]));
} else {
len += scnprintf(buf + len, PAGE_SIZE - len,
"N/A\t\t");
}
}
}
out:
len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
return len;
}
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
{
ssize_t len = 0;
int i, j;
struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return 0;
cpufreq_stats_update(stat->cpu);
len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n");
len += snprintf(buf + len, PAGE_SIZE - len, " : ");
for (i = 0; i < stat->state_num; i++) {
if (len >= PAGE_SIZE)
break;
len += snprintf(buf + len, PAGE_SIZE - len, "%9u ",
stat->freq_table[i]);
}
if (len >= PAGE_SIZE)
return PAGE_SIZE;
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
for (i = 0; i < stat->state_num; i++) {
if (len >= PAGE_SIZE)
break;
len += snprintf(buf + len, PAGE_SIZE - len, "%9u: ",
stat->freq_table[i]);
for (j = 0; j < stat->state_num; j++) {
if (len >= PAGE_SIZE)
break;
len += snprintf(buf + len, PAGE_SIZE - len, "%9u ",
stat->trans_table[i*stat->max_state+j]);
}
if (len >= PAGE_SIZE)
break;
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
}
if (len >= PAGE_SIZE)
return PAGE_SIZE;
return len;
}
cpufreq_freq_attr_ro(trans_table);
#endif
cpufreq_freq_attr_ro(total_trans);
cpufreq_freq_attr_ro(time_in_state);
static struct attribute *default_attrs[] = {
&total_trans.attr,
&time_in_state.attr,
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
&trans_table.attr,
#endif
NULL
};
static struct attribute_group stats_attr_group = {
.attrs = default_attrs,
.name = "stats"
};
static struct kobj_attribute _attr_all_time_in_state = __ATTR(all_time_in_state,
0444, show_all_time_in_state, NULL);
static struct kobj_attribute _attr_current_in_state = __ATTR(current_in_state,
0444, show_current_in_state, NULL);
static int freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq)
{
int index;
for (index = 0; index < stat->max_state; index++)
if (stat->freq_table[index] == freq)
return index;
return -1;
}
static void __cpufreq_stats_free_table(struct cpufreq_policy *policy)
{
struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu);
if (!stat)
return;
pr_debug("%s: Free stat table\n", __func__);
sysfs_remove_group(&policy->kobj, &stats_attr_group);
kfree(stat->time_in_state);
kfree(stat);
per_cpu(cpufreq_stats_table, policy->cpu) = NULL;
}
static void cpufreq_stats_free_table(unsigned int cpu)
{
struct cpufreq_policy *policy;
policy = cpufreq_cpu_get(cpu);
if (!policy)
return;
if (cpufreq_frequency_get_table(policy->cpu))
__cpufreq_stats_free_table(policy);
cpufreq_cpu_put(policy);
}
static void cpufreq_allstats_free(void)
{
int cpu;
struct all_cpufreq_stats *all_stat;
sysfs_remove_file(cpufreq_global_kobject,
&_attr_all_time_in_state.attr);
for_each_possible_cpu(cpu) {
all_stat = per_cpu(all_cpufreq_stats, cpu);
if (!all_stat)
continue;
kfree(all_stat->time_in_state);
kfree(all_stat);
per_cpu(all_cpufreq_stats, cpu) = NULL;
}
if (all_freq_table) {
kfree(all_freq_table->freq_table);
kfree(all_freq_table);
all_freq_table = NULL;
}
}
static void cpufreq_powerstats_free(void)
{
int cpu;
struct cpufreq_power_stats *powerstats;
sysfs_remove_file(cpufreq_global_kobject, &_attr_current_in_state.attr);
for_each_possible_cpu(cpu) {
powerstats = per_cpu(cpufreq_power_stats, cpu);
if (!powerstats)
continue;
kfree(powerstats->curr);
kfree(powerstats);
per_cpu(cpufreq_power_stats, cpu) = NULL;
}
}
static int __cpufreq_stats_create_table(struct cpufreq_policy *policy,
struct cpufreq_frequency_table *table, int count)
{
unsigned int i, j, ret = 0;
struct cpufreq_stats *stat;
unsigned int alloc_size;
unsigned int cpu = policy->cpu;
if (per_cpu(cpufreq_stats_table, cpu))
return -EBUSY;
stat = kzalloc(sizeof(*stat), GFP_KERNEL);
if ((stat) == NULL) {
pr_err("Failed to alloc cpufreq_stats table\n");
return -ENOMEM;
}
ret = sysfs_create_group(&policy->kobj, &stats_attr_group);
if (ret) {
pr_err("Failed to create cpufreq_stats sysfs\n");
goto error_out;
}
stat->cpu = cpu;
per_cpu(cpufreq_stats_table, cpu) = stat;
alloc_size = count * sizeof(int) + count * sizeof(u64);
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
alloc_size += count * count * sizeof(int);
#endif
stat->max_state = count;
stat->time_in_state = kzalloc(alloc_size, GFP_KERNEL);
if (!stat->time_in_state) {
ret = -ENOMEM;
pr_err("Failed to alloc cpufreq_stats table\n");
goto error_alloc;
}
stat->freq_table = (unsigned int *)(stat->time_in_state + count);
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
stat->trans_table = stat->freq_table + count;
#endif
j = 0;
for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
unsigned int freq = table[i].frequency;
if (freq == CPUFREQ_ENTRY_INVALID)
continue;
if (freq_table_get_index(stat, freq) == -1)
stat->freq_table[j++] = freq;
}
stat->state_num = j;
spin_lock(&cpufreq_stats_lock);
stat->last_time = get_jiffies_64();
stat->last_index = freq_table_get_index(stat, policy->cur);
spin_unlock(&cpufreq_stats_lock);
return 0;
error_alloc:
sysfs_remove_group(&policy->kobj, &stats_attr_group);
error_out:
kfree(stat);
per_cpu(cpufreq_stats_table, cpu) = NULL;
return ret;
}
static void cpufreq_stats_update_policy_cpu(struct cpufreq_policy *policy)
{
struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table,
policy->last_cpu);
if (!stat)
return;
pr_debug("Updating stats_table for new_cpu %u from last_cpu %u\n",
policy->cpu, policy->last_cpu);
per_cpu(cpufreq_stats_table, policy->cpu) = per_cpu(cpufreq_stats_table,
policy->last_cpu);
per_cpu(cpufreq_stats_table, policy->last_cpu) = NULL;
stat->cpu = policy->cpu;
}
static void cpufreq_powerstats_create(unsigned int cpu,
struct cpufreq_frequency_table *table, int count) {
unsigned int alloc_size, i = 0, j = 0, ret = 0;
struct cpufreq_power_stats *powerstats;
struct device_node *cpu_node;
char device_path[16];
powerstats = kzalloc(sizeof(struct cpufreq_power_stats),
GFP_KERNEL);
if (!powerstats)
return;
/* Allocate memory for freq table per cpu as well as clockticks per
* freq*/
alloc_size = count * sizeof(unsigned int) +
count * sizeof(unsigned int);
powerstats->curr = kzalloc(alloc_size, GFP_KERNEL);
if (!powerstats->curr) {
kfree(powerstats);
return;
}
powerstats->freq_table = powerstats->curr + count;
spin_lock(&cpufreq_stats_lock);
for (i = 0; table[i].frequency != CPUFREQ_TABLE_END && j < count; i++) {
unsigned int freq = table[i].frequency;
if (freq == CPUFREQ_ENTRY_INVALID)
continue;
powerstats->freq_table[j++] = freq;
}
powerstats->state_num = j;
snprintf(device_path, sizeof(device_path), "/cpus/cpu@%d", cpu);
cpu_node = of_find_node_by_path(device_path);
if (cpu_node) {
ret = of_property_read_u32_array(cpu_node, "current",
powerstats->curr, count);
if (ret) {
kfree(powerstats->curr);
kfree(powerstats);
powerstats = NULL;
}
}
per_cpu(cpufreq_power_stats, cpu) = powerstats;
spin_unlock(&cpufreq_stats_lock);
}
static int compare_for_sort(const void *lhs_ptr, const void *rhs_ptr)
{
unsigned int lhs = *(const unsigned int *)(lhs_ptr);
unsigned int rhs = *(const unsigned int *)(rhs_ptr);
if (lhs < rhs)
return -1;
if (lhs > rhs)
return 1;
return 0;
}
static bool check_all_freq_table(unsigned int freq)
{
int i;
for (i = 0; i < all_freq_table->table_size; i++) {
if (freq == all_freq_table->freq_table[i])
return true;
}
return false;
}
static void create_all_freq_table(void)
{
all_freq_table = kzalloc(sizeof(struct all_freq_table),
GFP_KERNEL);
if (!all_freq_table)
pr_warn("could not allocate memory for all_freq_table\n");
return;
}
static void free_all_freq_table(void)
{
if (all_freq_table) {
kfree(all_freq_table->freq_table);
all_freq_table->freq_table = NULL;
kfree(all_freq_table);
all_freq_table = NULL;
}
}
static void add_all_freq_table(unsigned int freq)
{
unsigned int size;
size = sizeof(unsigned int) * (all_freq_table->table_size + 1);
all_freq_table->freq_table = krealloc(all_freq_table->freq_table,
cpufreq: fix sleeping in atomic context when realloc freq_table for all_time_in_state Commit c24a1d58a1a0 ("cpufreq: Persist cpufreq time in state data across hotplug") causes the following call trace to be spit on boot: BUG: sleeping function called from invalid context at mm/slub.c:936 in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: swapper/0 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 3.10.9-20140624.172707-eng-gd6c0f69-dirty #50 Backtrace: [<c0012270>] (dump_backtrace+0x0/0x10c) from [<c001256c>] (show_stack+0x18/0x1c) r6:ffff1788 r5:c0c020c0 r4:e609c000 r3:00000000 [<c0012554>] (show_stack+0x0/0x1c) from [<c07a2970>] (dump_stack+0x20/0x28) [<c07a2950>] (dump_stack+0x0/0x28) from [<c0057678>] (__might_sleep+0x104/0x120) [<c0057574>] (__might_sleep+0x0/0x120) from [<c00ff000>] (__kmalloc_track_caller+0x144/0x274) r6:00000000 r5:e609c000 r4:e6802140 [<c00feebc>] (__kmalloc_track_caller+0x0/0x274) from [<c00da098>] (krealloc+0x58/0xb0) [<c00da040>] (krealloc+0x0/0xb0) from [<c050266c>] (cpufreq_allstats_create+0x120/0x204) r8:e4c4ff00 r7:c0d266b8 r6:0013d620 r5:e4c4e600 r4:00000001 r3:e535d6d0 [<c050254c>] (cpufreq_allstats_create+0x0/0x204) from [<c0502e38>] (cpufreq_stat_notifier_policy+0xb8/0xd0) [<c0502d80>] (cpufreq_stat_notifier_policy+0x0/0xd0) from [<c00517cc>] (notifier_call_chain+0x4c/0x8c) r5:00000000 r4:fffffffe [<c0051780>] (notifier_call_chain+0x0/0x8c) from [<c00519fc>] (__blocking_notifier_call_chain+0x50/0x68) r8:c0cd4d00 r7:00000002 r6:e609dd7c r5:ffffffff r4:c0d25a4c r3:ffffffff [<c00519ac>] (__blocking_notifier_call_chain+0x0/0x68) from [<c0051a34>] (blocking_notifier_call_chain+0x20/0x28) r7:c0e24f30 r6:00000000 r5:e53e1e00 r4:e609dd7c [<c0051a14>] (blocking_notifier_call_chain+0x0/0x28) from [<c0500fec>] (__cpufreq_set_policy+0xc0/0x1d0) [<c0500f2c>] (__cpufreq_set_policy+0x0/0x1d0) from [<c0501308>] (cpufreq_add_dev_interface+0x20c/0x270) r7:00000008 r6:00000000 r5:e53e1e00 r4:e53e1e58 [<c05010fc>] (cpufreq_add_dev_interface+0x0/0x270) from [<c05016a8>] (cpufreq_add_dev+0x33c/0x420) [<c050136c>] (cpufreq_add_dev+0x0/0x420) from [<c03604a4>] (subsys_interface_register+0x80/0xbc) [<c0360424>] (subsys_interface_register+0x0/0xbc) from [<c050035c>] (cpufreq_register_driver+0x8c/0x194) Change-Id: If77a656d0ea60a8fc4083283d104509fa6c07f8f Signed-off-by: Minsung Kim <ms925.kim@samsung.com> Git-commit: d7052164059dd0447cc8f00a2692161a81644417 Git-repo: https://android.googlesource.com/kernel/msm/ Signed-off-by: Nirmal Abraham <nabrah@codeaurora.org> Signed-off-by: Avaneesh Kumar Dwivedi <akdwived@codeaurora.org>
2014-06-25 10:44:50 +00:00
size, GFP_ATOMIC);
if (IS_ERR(all_freq_table->freq_table)) {
pr_warn("Could not reallocate memory for freq_table\n");
all_freq_table->freq_table = NULL;
return;
}
all_freq_table->freq_table[all_freq_table->table_size++] = freq;
}
static void cpufreq_allstats_create(unsigned int cpu,
struct cpufreq_frequency_table *table, int count)
{
int i , j = 0;
unsigned int alloc_size;
struct all_cpufreq_stats *all_stat;
bool sort_needed = false;
all_stat = kzalloc(sizeof(struct all_cpufreq_stats),
GFP_KERNEL);
if (!all_stat) {
pr_warn("Cannot allocate memory for cpufreq stats\n");
return;
}
/*Allocate memory for freq table per cpu as well as clockticks per freq*/
alloc_size = count * sizeof(int) + count * sizeof(cputime64_t);
all_stat->time_in_state = kzalloc(alloc_size, GFP_KERNEL);
if (!all_stat->time_in_state) {
pr_warn("Cannot allocate memory for cpufreq time_in_state\n");
kfree(all_stat);
all_stat = NULL;
return;
}
all_stat->freq_table = (unsigned int *)
(all_stat->time_in_state + count);
spin_lock(&cpufreq_stats_lock);
for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
unsigned int freq = table[i].frequency;
if (freq == CPUFREQ_ENTRY_INVALID)
continue;
all_stat->freq_table[j++] = freq;
if (all_freq_table && !check_all_freq_table(freq)) {
add_all_freq_table(freq);
sort_needed = true;
}
}
if (sort_needed)
sort(all_freq_table->freq_table, all_freq_table->table_size,
sizeof(unsigned int), &compare_for_sort, NULL);
all_stat->state_num = j;
per_cpu(all_cpufreq_stats, cpu) = all_stat;
spin_unlock(&cpufreq_stats_lock);
}
static int cpufreq_stat_notifier_policy(struct notifier_block *nb,
unsigned long val, void *data)
{
int ret = 0, count = 0, i;
struct cpufreq_policy *policy = data;
struct cpufreq_frequency_table *table;
unsigned int cpu_num, cpu = policy->cpu;
if (val == CPUFREQ_UPDATE_POLICY_CPU) {
cpufreq_stats_update_policy_cpu(policy);
return 0;
}
table = cpufreq_frequency_get_table(cpu);
if (!table)
return 0;
cpufreq: stats: handle cpufreq_unregister_driver() and suspend/resume properly There are several problems with cpufreq stats in the way it handles cpufreq_unregister_driver() and suspend/resume.. - We must not lose data collected so far when suspend/resume happens and so stats directories must not be removed/allocated during these operations, which is done currently. - cpufreq_stat has registered notifiers with both cpufreq and hotplug. It adds sysfs stats directory with a cpufreq notifier: CPUFREQ_NOTIFY and removes this directory with a notifier from hotplug core. In case cpufreq_unregister_driver() is called (on rmmod cpufreq driver), stats directories per cpu aren't removed as CPUs are still online. The only call cpufreq_stats gets is cpufreq_stats_update_policy_cpu() for all CPUs except the last of each policy. And pointer to stat information is stored in the entry for last CPU in the per-cpu cpufreq_stats_table. But policy structure would be freed inside cpufreq core and so that will result in memory leak inside cpufreq stats (as we are never freeing memory for stats). Now if we again insert the module cpufreq_register_driver() will be called and we will again allocate stats data and put it on for first CPU of every policy. In case we only have a single CPU per policy, we will return with a error from cpufreq_stats_create_table() due to this code: if (per_cpu(cpufreq_stats_table, cpu)) return -EBUSY; And so probably cpufreq stats directory would not show up anymore (as it was added inside last policies->kobj which doesn't exist anymore). I haven't tested it, though. Also the values in stats files wouldn't be refreshed as we are using the earlier stats structure. - CPUFREQ_NOTIFY is called from cpufreq_set_policy() which is called for scenarios where we don't really want cpufreq_stat_notifier_policy() to get called. For example whenever we are changing anything related to a policy: min/max/current freq, etc. cpufreq_set_policy() is called and so cpufreq stats is notified. Where we don't do any useful stuff other than simply returning with -EBUSY from cpufreq_stats_create_table(). And so this isn't the right notifier that cpufreq stats.. Due to all above reasons this patch does following changes: - Add new notifiers CPUFREQ_CREATE_POLICY and CPUFREQ_REMOVE_POLICY, which are only called when policy is created/destroyed. They aren't called for suspend/resume paths.. - Use these notifiers in cpufreq_stat_notifier_policy() to create/destory stats sysfs entries. And so cpufreq_unregister_driver() or suspend/resume shouldn't be a problem for cpufreq_stats. - Return early from cpufreq_stat_cpu_callback() for suspend/resume sequence, so that we don't free stats structure. Change-Id: I03340f9fe3f79cb4df3ef94e94b9ff573c1df422 Acked-by: Nicolas Pitre <nico@linaro.org> Tested-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Git-commit: fcd7af917abba798cd954419030142e95139359f Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Signed-off-by: Junjie Wu <junjiew@codeaurora.org>
2014-01-07 01:40:10 +00:00
for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
unsigned int freq = table[i].frequency;
if (freq == CPUFREQ_ENTRY_INVALID)
continue;
count++;
}
if (!per_cpu(all_cpufreq_stats, cpu))
cpufreq_allstats_create(cpu, table, count);
for_each_possible_cpu(cpu_num) {
if (!per_cpu(cpufreq_power_stats, cpu_num))
cpufreq_powerstats_create(cpu_num, table, count);
}
cpufreq: stats: handle cpufreq_unregister_driver() and suspend/resume properly There are several problems with cpufreq stats in the way it handles cpufreq_unregister_driver() and suspend/resume.. - We must not lose data collected so far when suspend/resume happens and so stats directories must not be removed/allocated during these operations, which is done currently. - cpufreq_stat has registered notifiers with both cpufreq and hotplug. It adds sysfs stats directory with a cpufreq notifier: CPUFREQ_NOTIFY and removes this directory with a notifier from hotplug core. In case cpufreq_unregister_driver() is called (on rmmod cpufreq driver), stats directories per cpu aren't removed as CPUs are still online. The only call cpufreq_stats gets is cpufreq_stats_update_policy_cpu() for all CPUs except the last of each policy. And pointer to stat information is stored in the entry for last CPU in the per-cpu cpufreq_stats_table. But policy structure would be freed inside cpufreq core and so that will result in memory leak inside cpufreq stats (as we are never freeing memory for stats). Now if we again insert the module cpufreq_register_driver() will be called and we will again allocate stats data and put it on for first CPU of every policy. In case we only have a single CPU per policy, we will return with a error from cpufreq_stats_create_table() due to this code: if (per_cpu(cpufreq_stats_table, cpu)) return -EBUSY; And so probably cpufreq stats directory would not show up anymore (as it was added inside last policies->kobj which doesn't exist anymore). I haven't tested it, though. Also the values in stats files wouldn't be refreshed as we are using the earlier stats structure. - CPUFREQ_NOTIFY is called from cpufreq_set_policy() which is called for scenarios where we don't really want cpufreq_stat_notifier_policy() to get called. For example whenever we are changing anything related to a policy: min/max/current freq, etc. cpufreq_set_policy() is called and so cpufreq stats is notified. Where we don't do any useful stuff other than simply returning with -EBUSY from cpufreq_stats_create_table(). And so this isn't the right notifier that cpufreq stats.. Due to all above reasons this patch does following changes: - Add new notifiers CPUFREQ_CREATE_POLICY and CPUFREQ_REMOVE_POLICY, which are only called when policy is created/destroyed. They aren't called for suspend/resume paths.. - Use these notifiers in cpufreq_stat_notifier_policy() to create/destory stats sysfs entries. And so cpufreq_unregister_driver() or suspend/resume shouldn't be a problem for cpufreq_stats. - Return early from cpufreq_stat_cpu_callback() for suspend/resume sequence, so that we don't free stats structure. Change-Id: I03340f9fe3f79cb4df3ef94e94b9ff573c1df422 Acked-by: Nicolas Pitre <nico@linaro.org> Tested-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Git-commit: fcd7af917abba798cd954419030142e95139359f Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Signed-off-by: Junjie Wu <junjiew@codeaurora.org>
2014-01-07 01:40:10 +00:00
if (val == CPUFREQ_CREATE_POLICY)
ret = __cpufreq_stats_create_table(policy, table, count);
else if (val == CPUFREQ_REMOVE_POLICY)
__cpufreq_stats_free_table(policy);
cpufreq: stats: handle cpufreq_unregister_driver() and suspend/resume properly There are several problems with cpufreq stats in the way it handles cpufreq_unregister_driver() and suspend/resume.. - We must not lose data collected so far when suspend/resume happens and so stats directories must not be removed/allocated during these operations, which is done currently. - cpufreq_stat has registered notifiers with both cpufreq and hotplug. It adds sysfs stats directory with a cpufreq notifier: CPUFREQ_NOTIFY and removes this directory with a notifier from hotplug core. In case cpufreq_unregister_driver() is called (on rmmod cpufreq driver), stats directories per cpu aren't removed as CPUs are still online. The only call cpufreq_stats gets is cpufreq_stats_update_policy_cpu() for all CPUs except the last of each policy. And pointer to stat information is stored in the entry for last CPU in the per-cpu cpufreq_stats_table. But policy structure would be freed inside cpufreq core and so that will result in memory leak inside cpufreq stats (as we are never freeing memory for stats). Now if we again insert the module cpufreq_register_driver() will be called and we will again allocate stats data and put it on for first CPU of every policy. In case we only have a single CPU per policy, we will return with a error from cpufreq_stats_create_table() due to this code: if (per_cpu(cpufreq_stats_table, cpu)) return -EBUSY; And so probably cpufreq stats directory would not show up anymore (as it was added inside last policies->kobj which doesn't exist anymore). I haven't tested it, though. Also the values in stats files wouldn't be refreshed as we are using the earlier stats structure. - CPUFREQ_NOTIFY is called from cpufreq_set_policy() which is called for scenarios where we don't really want cpufreq_stat_notifier_policy() to get called. For example whenever we are changing anything related to a policy: min/max/current freq, etc. cpufreq_set_policy() is called and so cpufreq stats is notified. Where we don't do any useful stuff other than simply returning with -EBUSY from cpufreq_stats_create_table(). And so this isn't the right notifier that cpufreq stats.. Due to all above reasons this patch does following changes: - Add new notifiers CPUFREQ_CREATE_POLICY and CPUFREQ_REMOVE_POLICY, which are only called when policy is created/destroyed. They aren't called for suspend/resume paths.. - Use these notifiers in cpufreq_stat_notifier_policy() to create/destory stats sysfs entries. And so cpufreq_unregister_driver() or suspend/resume shouldn't be a problem for cpufreq_stats. - Return early from cpufreq_stat_cpu_callback() for suspend/resume sequence, so that we don't free stats structure. Change-Id: I03340f9fe3f79cb4df3ef94e94b9ff573c1df422 Acked-by: Nicolas Pitre <nico@linaro.org> Tested-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Git-commit: fcd7af917abba798cd954419030142e95139359f Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Signed-off-by: Junjie Wu <junjiew@codeaurora.org>
2014-01-07 01:40:10 +00:00
return ret;
}
static void cpufreq_stats_create_table(unsigned int cpu)
{
struct cpufreq_policy *policy;
struct cpufreq_frequency_table *table;
int i, cpu_num, count = 0;
/*
* "likely(!policy)" because normally cpufreq_stats will be registered
* before cpufreq driver
*/
policy = cpufreq_cpu_get(cpu);
if (likely(!policy))
return;
table = cpufreq_frequency_get_table(policy->cpu);
if (likely(table)) {
for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
unsigned int freq = table[i].frequency;
if (freq != CPUFREQ_ENTRY_INVALID)
count++;
}
if (!per_cpu(all_cpufreq_stats, cpu))
cpufreq_allstats_create(cpu, table, count);
for_each_possible_cpu(cpu_num) {
if (!per_cpu(cpufreq_power_stats, cpu))
cpufreq_powerstats_create(cpu, table, count);
}
__cpufreq_stats_create_table(policy, table, count);
}
cpufreq_cpu_put(policy);
}
static int cpufreq_stat_notifier_trans(struct notifier_block *nb,
unsigned long val, void *data)
{
struct cpufreq_freqs *freq = data;
struct cpufreq_stats *stat;
int old_index, new_index;
if (val != CPUFREQ_POSTCHANGE)
return 0;
stat = per_cpu(cpufreq_stats_table, freq->cpu);
if (!stat)
return 0;
old_index = stat->last_index;
new_index = freq_table_get_index(stat, freq->new);
[CPUFREQ] Don't set stat->last_index to -1 if the pol->cur has incorrect value. If the driver submitted an non-existing pol>cur value (say it used the default initialized value of zero), when the cpufreq stats tries to setup its initial values it incorrectly sets stat->last_index to -1 (or 0xfffff...). And cpufreq_stats_update tries to update at that index location and fails. This can be caused by: stat->last_index = freq_table_get_index(stat, policy->cur); not finding the appropiate frequency in the table (b/c the policy->cur is wrong) and we end up crashing. The fix however is concentrated in the 'cpufreq_stats_update' as the last_index (and old_index) are updated there. Which means it can reset the last_index to -1 again and on the next iteration cause a crash. Without this patch, the following crash is observed: powernow-k8: Found 1 AMD Athlon(tm) 64 Processor 3700+ (1 cpu cores) (version 2.20.00) powernow-k8: fid 0x2 (1000 MHz), vid 0x12 powernow-k8: fid 0xa (1800 MHz), vid 0xa powernow-k8: fid 0xc (2000 MHz), vid 0x8 powernow-k8: fid 0xe (2200 MHz), vid 0x8 Marking TSC unstable due to cpufreq changes powernow-k8: fid trans failed, fid 0x2, curr 0x0 BUG: unable to handle kernel paging request at ffff880807e07b78 IP: [<ffffffff81479163>] cpufreq_stats_update+0x46/0x5b .. snip.. Pid: 1, comm: swapper Not tainted 3.0.0-rc2 #45 MICRO-STAR INTERNATIONAL CO., LTD MS-7094/MS-7094 ..snip.. Call Trace: [<ffffffff81479248>] cpufreq_stat_notifier_trans+0x48/0x7c [<ffffffff81095d68>] notifier_call_chain+0x32/0x5e [<ffffffff81095e6b>] __srcu_notifier_call_chain+0x47/0x63 [<ffffffff81095e96>] srcu_notifier_call_chain+0xf/0x11 [<ffffffff81477e7a>] cpufreq_notify_transition+0x111/0x134 [<ffffffff8147b0d4>] powernowk8_target+0x53b/0x617 [<ffffffff8147723a>] __cpufreq_driver_target+0x2e/0x30 [<ffffffff8147a127>] cpufreq_governor_dbs+0x339/0x356 [<ffffffff81477394>] __cpufreq_governor+0xa8/0xe9 [<ffffffff81477525>] __cpufreq_set_policy+0x132/0x13e [<ffffffff8147848d>] cpufreq_add_dev_interface+0x272/0x28c Reported-by: Tobias Diedrich <ranma+xen@tdiedrich.de> Tested-by: Tobias Diedrich <ranma+xen@tdiedrich.de> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Dave Jones <davej@redhat.com>
2011-06-16 19:36:38 +00:00
/* We can't do stat->time_in_state[-1]= .. */
if (old_index == -1 || new_index == -1)
return 0;
[CPUFREQ] Don't set stat->last_index to -1 if the pol->cur has incorrect value. If the driver submitted an non-existing pol>cur value (say it used the default initialized value of zero), when the cpufreq stats tries to setup its initial values it incorrectly sets stat->last_index to -1 (or 0xfffff...). And cpufreq_stats_update tries to update at that index location and fails. This can be caused by: stat->last_index = freq_table_get_index(stat, policy->cur); not finding the appropiate frequency in the table (b/c the policy->cur is wrong) and we end up crashing. The fix however is concentrated in the 'cpufreq_stats_update' as the last_index (and old_index) are updated there. Which means it can reset the last_index to -1 again and on the next iteration cause a crash. Without this patch, the following crash is observed: powernow-k8: Found 1 AMD Athlon(tm) 64 Processor 3700+ (1 cpu cores) (version 2.20.00) powernow-k8: fid 0x2 (1000 MHz), vid 0x12 powernow-k8: fid 0xa (1800 MHz), vid 0xa powernow-k8: fid 0xc (2000 MHz), vid 0x8 powernow-k8: fid 0xe (2200 MHz), vid 0x8 Marking TSC unstable due to cpufreq changes powernow-k8: fid trans failed, fid 0x2, curr 0x0 BUG: unable to handle kernel paging request at ffff880807e07b78 IP: [<ffffffff81479163>] cpufreq_stats_update+0x46/0x5b .. snip.. Pid: 1, comm: swapper Not tainted 3.0.0-rc2 #45 MICRO-STAR INTERNATIONAL CO., LTD MS-7094/MS-7094 ..snip.. Call Trace: [<ffffffff81479248>] cpufreq_stat_notifier_trans+0x48/0x7c [<ffffffff81095d68>] notifier_call_chain+0x32/0x5e [<ffffffff81095e6b>] __srcu_notifier_call_chain+0x47/0x63 [<ffffffff81095e96>] srcu_notifier_call_chain+0xf/0x11 [<ffffffff81477e7a>] cpufreq_notify_transition+0x111/0x134 [<ffffffff8147b0d4>] powernowk8_target+0x53b/0x617 [<ffffffff8147723a>] __cpufreq_driver_target+0x2e/0x30 [<ffffffff8147a127>] cpufreq_governor_dbs+0x339/0x356 [<ffffffff81477394>] __cpufreq_governor+0xa8/0xe9 [<ffffffff81477525>] __cpufreq_set_policy+0x132/0x13e [<ffffffff8147848d>] cpufreq_add_dev_interface+0x272/0x28c Reported-by: Tobias Diedrich <ranma+xen@tdiedrich.de> Tested-by: Tobias Diedrich <ranma+xen@tdiedrich.de> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Dave Jones <davej@redhat.com>
2011-06-16 19:36:38 +00:00
cpufreq_stats_update(freq->cpu);
if (old_index == new_index)
return 0;
spin_lock(&cpufreq_stats_lock);
stat->last_index = new_index;
#ifdef CONFIG_CPU_FREQ_STAT_DETAILS
stat->trans_table[old_index * stat->max_state + new_index]++;
#endif
stat->total_trans++;
spin_unlock(&cpufreq_stats_lock);
return 0;
}
static struct notifier_block notifier_policy_block = {
.notifier_call = cpufreq_stat_notifier_policy
};
static struct notifier_block notifier_trans_block = {
.notifier_call = cpufreq_stat_notifier_trans
};
static int __init cpufreq_stats_init(void)
{
int ret;
unsigned int cpu;
[PATCH] create and destroy cpufreq sysfs entries based on cpu notifiers cpufreq entries in sysfs should only be populated when CPU is online state. When we either boot with maxcpus=x and then boot the other cpus by echoing to sysfs online file, these entries should be created and destroyed when CPU_DEAD is notified. Same treatement as cache entries under sysfs. We place the processor in the lowest frequency, so hw managed P-State transitions can still work on the other threads to save power. Primary goal was to just make these directories appear/disapper dynamically. There is one in this patch i had to do, which i really dont like myself but probably best if someone handling the cpufreq infrastructure could give this code right treatment if this is not acceptable. I guess its probably good for the first cut. - Converting lock_cpu_hotplug()/unlock_cpu_hotplug() to disable/enable preempt. The locking was smack in the middle of the notification path, when the hotplug is already holding the lock. I tried another solution to avoid this so avoid taking locks if we know we are from notification path. The solution was getting very ugly and i decided this was probably good for this iteration until someone who understands cpufreq could do a better job than me. (akpm: export cpucontrol to GPL modules: drivers/cpufreq/cpufreq_stats.c now does lock_cpu_hotplug()) Signed-off-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> Cc: Dave Jones <davej@codemonkey.org.uk> Cc: Zwane Mwaikambo <zwane@holomorphy.com> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 22:59:54 +00:00
spin_lock_init(&cpufreq_stats_lock);
ret = cpufreq_register_notifier(&notifier_policy_block,
CPUFREQ_POLICY_NOTIFIER);
if (ret)
return ret;
create_all_freq_table();
for_each_online_cpu(cpu)
cpufreq_stats_create_table(cpu);
ret = cpufreq_register_notifier(&notifier_trans_block,
CPUFREQ_TRANSITION_NOTIFIER);
if (ret) {
cpufreq_unregister_notifier(&notifier_policy_block,
CPUFREQ_POLICY_NOTIFIER);
for_each_online_cpu(cpu)
cpufreq_stats_free_table(cpu);
free_all_freq_table();
return ret;
}
ret = cpufreq_sysfs_create_file(&_attr_all_time_in_state.attr);
if (ret)
pr_warn("Cannot create sysfs file for cpufreq stats\n");
ret = cpufreq_sysfs_create_file(&_attr_current_in_state.attr);
if (ret)
pr_warn("Cannot create sysfs file for cpufreq current stats\n");
return 0;
}
static void __exit cpufreq_stats_exit(void)
{
unsigned int cpu;
[PATCH] create and destroy cpufreq sysfs entries based on cpu notifiers cpufreq entries in sysfs should only be populated when CPU is online state. When we either boot with maxcpus=x and then boot the other cpus by echoing to sysfs online file, these entries should be created and destroyed when CPU_DEAD is notified. Same treatement as cache entries under sysfs. We place the processor in the lowest frequency, so hw managed P-State transitions can still work on the other threads to save power. Primary goal was to just make these directories appear/disapper dynamically. There is one in this patch i had to do, which i really dont like myself but probably best if someone handling the cpufreq infrastructure could give this code right treatment if this is not acceptable. I guess its probably good for the first cut. - Converting lock_cpu_hotplug()/unlock_cpu_hotplug() to disable/enable preempt. The locking was smack in the middle of the notification path, when the hotplug is already holding the lock. I tried another solution to avoid this so avoid taking locks if we know we are from notification path. The solution was getting very ugly and i decided this was probably good for this iteration until someone who understands cpufreq could do a better job than me. (akpm: export cpucontrol to GPL modules: drivers/cpufreq/cpufreq_stats.c now does lock_cpu_hotplug()) Signed-off-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> Cc: Dave Jones <davej@codemonkey.org.uk> Cc: Zwane Mwaikambo <zwane@holomorphy.com> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 22:59:54 +00:00
cpufreq_unregister_notifier(&notifier_policy_block,
CPUFREQ_POLICY_NOTIFIER);
cpufreq_unregister_notifier(&notifier_trans_block,
CPUFREQ_TRANSITION_NOTIFIER);
for_each_online_cpu(cpu)
cpufreq_stats_free_table(cpu);
cpufreq_allstats_free();
cpufreq_powerstats_free();
}
MODULE_AUTHOR("Zou Nan hai <nanhai.zou@intel.com>");
MODULE_DESCRIPTION("'cpufreq_stats' - A driver to export cpufreq stats "
"through sysfs filesystem");
MODULE_LICENSE("GPL");
module_init(cpufreq_stats_init);
module_exit(cpufreq_stats_exit);