From 2228768885e0b92c0f7b276cc61b8974e7aed724 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 3 May 2013 11:16:18 -0400 Subject: [PATCH 01/18] ring-buffer: Select IRQ_WORK As the wake up logic for waiters on the buffer has been moved from the tracing code to the ring buffer, it requires also adding IRQ_WORK as the wake up code is performed via irq_work. This fixes compile breakage when a user of the ring buffer is selected but tracing and irq_work are not. Link http://lkml.kernel.org/r/20130503115332.GT8356@rric.localhost Cc: Arnd Bergmann Reported-by: Robert Richter Signed-off-by: Steven Rostedt --- kernel/trace/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 5e9efd4b83a4..015f85aaca08 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -71,6 +71,7 @@ config TRACE_CLOCK config RING_BUFFER bool select TRACE_CLOCK + select IRQ_WORK config FTRACE_NMI_ENTER bool @@ -107,7 +108,6 @@ config TRACING select BINARY_PRINTF select EVENT_TRACING select TRACE_CLOCK - select IRQ_WORK config GENERIC_TRACER bool From a5b85bd1557209b4ef18a8cf07e60a1ca3132468 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:14 +0900 Subject: [PATCH 02/18] tracing: Don't succeed if event_enable_func did not register anything Return 0 instead of the number of activated ftrace function probes if event_enable_func succeeded and return an error code if it failed or did not register any functions. But it currently returns the number of registered functions and if it didn't register anything, it returns 0, but that is considered success. This also fixes the return value. As if it succeeds, it returns the number of functions that were enabled, which is returned back to the user in ftrace_regex_write (the write() return code). If only one function is enabled, then the return code of the write is one, and this can confuse the user program in thinking it only wrote 1 byte. Link: http://lkml.kernel.org/r/20130509054413.30398.55650.stgit@mhiramat-M0-7522 Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Signed-off-by: Masami Hiramatsu [ Rewrote change log to reflect that this fixes two bugs - SR ] Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 53582e982e51..44ac83614c3d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2061,8 +2061,11 @@ event_enable_func(struct ftrace_hash *hash, if (ret < 0) goto out_put; ret = register_ftrace_function_probe(glob, ops, data); - if (!ret) + if (!ret) { + ret = -ENOENT; goto out_disable; + } else + ret = 0; out: mutex_unlock(&event_mutex); return ret; From ff305ded9ff83436039a16d31bc558dc6598d7ce Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 9 May 2013 11:30:26 -0400 Subject: [PATCH 03/18] tracing: Return error if register_ftrace_function_probe() fails for event_enable_func() register_ftrace_function_probe() returns the number of functions it registered, which can be zero, it can also return a negative number if something went wrong. But event_enable_func() only checks for the case that it didn't register anything, it needs to also check for the case that something went wrong and return that error code as well. Added some comments about the code as well, to make it more understandable. Cc: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 44ac83614c3d..87e826f1c237 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2061,11 +2061,18 @@ event_enable_func(struct ftrace_hash *hash, if (ret < 0) goto out_put; ret = register_ftrace_function_probe(glob, ops, data); + /* + * The above returns on success the # of functions enabled, + * but if it didn't find any functions it returns zero. + * Consider no functions a failure too. + */ if (!ret) { ret = -ENOENT; goto out_disable; - } else - ret = 0; + } else if (ret < 0) + goto out_disable; + /* Just return zero, not the number of enabled functions */ + ret = 0; out: mutex_unlock(&event_mutex); return ret; From 7c088b5120ffef017e2ddc38f992277e96436ef6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 9 May 2013 11:35:12 -0400 Subject: [PATCH 04/18] ftrace: Have ftrace_regex_write() return either read or error As ftrace_regex_write() reads the result of ftrace_process_regex() which can sometimes return a positive number, only consider a failure if the return is negative. Otherwise, it will skip possible other registered probes and by returning a positive number that wasn't read, it will confuse the user processes doing the writing. Cc: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8a5c017bb50c..d85a0ad81a67 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3276,7 +3276,7 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, ret = ftrace_process_regex(iter->hash, parser->buffer, parser->idx, enable); trace_parser_clear(parser); - if (ret) + if (ret < 0) goto out_unlock; } From f04f24fb7e48d446bd89a01c6056571f25972511 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:17 +0900 Subject: [PATCH 05/18] ftrace, kprobes: Fix a deadlock on ftrace_regex_lock Fix a deadlock on ftrace_regex_lock which happens when setting an enable_event trigger on dynamic kprobe event as below. ---- sh-2.05b# echo p vfs_symlink > kprobe_events sh-2.05b# echo vfs_symlink:enable_event:kprobes:p_vfs_symlink_0 > set_ftrace_filter ============================================= [ INFO: possible recursive locking detected ] 3.9.0+ #35 Not tainted --------------------------------------------- sh/72 is trying to acquire lock: (ftrace_regex_lock){+.+.+.}, at: [] ftrace_set_hash+0x81/0x1f0 but task is already holding lock: (ftrace_regex_lock){+.+.+.}, at: [] ftrace_regex_write.isra.29.part.30+0x3d/0x220 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(ftrace_regex_lock); lock(ftrace_regex_lock); *** DEADLOCK *** ---- To fix that, this introduces a finer regex_lock for each ftrace_ops. ftrace_regex_lock is too big of a lock which protects all filter/notrace_hash operations, but it doesn't need to be a global lock after supporting multiple ftrace_ops because each ftrace_ops has its own filter/notrace_hash. Link: http://lkml.kernel.org/r/20130509054417.30398.84254.stgit@mhiramat-M0-7522 Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Signed-off-by: Masami Hiramatsu [ Added initialization flag and automate mutex initialization for non ftrace.c ftrace_probes. ] Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 4 +++ kernel/trace/ftrace.c | 73 ++++++++++++++++++++++++++++++------------ 2 files changed, 56 insertions(+), 21 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f83e17a40e8b..99d0fbcbaf79 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -90,6 +90,8 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, * not set this, then the ftrace infrastructure will add recursion * protection for the caller. * STUB - The ftrace_ops is just a place holder. + * INITIALIZED - The ftrace_ops has already been initialized (first use time + * register_ftrace_function() is called, it will initialized the ops) */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -100,6 +102,7 @@ enum { FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, FTRACE_OPS_FL_STUB = 1 << 7, + FTRACE_OPS_FL_INITIALIZED = 1 << 8, }; struct ftrace_ops { @@ -110,6 +113,7 @@ struct ftrace_ops { #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_hash *notrace_hash; struct ftrace_hash *filter_hash; + struct mutex regex_lock; #endif }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d85a0ad81a67..827f2fe7bc3f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -64,6 +64,13 @@ #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) +#ifdef CONFIG_DYNAMIC_FTRACE +#define INIT_REGEX_LOCK(opsname) \ + .regex_lock = __MUTEX_INITIALIZER(opsname.regex_lock), +#else +#define INIT_REGEX_LOCK(opsname) +#endif + static struct ftrace_ops ftrace_list_end __read_mostly = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB, @@ -131,6 +138,16 @@ static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); while (likely(op = rcu_dereference_raw((op)->next)) && \ unlikely((op) != &ftrace_list_end)) +static inline void ftrace_ops_init(struct ftrace_ops *ops) +{ +#ifdef CONFIG_DYNAMIC_FTRACE + if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) { + mutex_init(&ops->regex_lock); + ops->flags |= FTRACE_OPS_FL_INITIALIZED; + } +#endif +} + /** * ftrace_nr_registered_ops - return number of ops registered * @@ -907,7 +924,8 @@ static void unregister_ftrace_profiler(void) #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + INIT_REGEX_LOCK(ftrace_profile_ops) }; static int register_ftrace_profiler(void) @@ -1103,11 +1121,10 @@ static struct ftrace_ops global_ops = { .func = ftrace_stub, .notrace_hash = EMPTY_HASH, .filter_hash = EMPTY_HASH, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + INIT_REGEX_LOCK(global_ops) }; -static DEFINE_MUTEX(ftrace_regex_lock); - struct ftrace_page { struct ftrace_page *next; struct dyn_ftrace *records; @@ -1247,6 +1264,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) void ftrace_free_filter(struct ftrace_ops *ops) { + ftrace_ops_init(ops); free_ftrace_hash(ops->filter_hash); free_ftrace_hash(ops->notrace_hash); } @@ -2624,6 +2642,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, struct ftrace_hash *hash; int ret = 0; + ftrace_ops_init(ops); + if (unlikely(ftrace_disabled)) return -ENODEV; @@ -2656,7 +2676,7 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, } } - mutex_lock(&ftrace_regex_lock); + mutex_lock(&ops->regex_lock); if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) @@ -2677,7 +2697,7 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, } } else file->private_data = iter; - mutex_unlock(&ftrace_regex_lock); + mutex_unlock(&ops->regex_lock); return ret; } @@ -2910,6 +2930,8 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, static struct ftrace_ops trace_probe_ops __read_mostly = { .func = function_trace_probe_call, + .flags = FTRACE_OPS_FL_INITIALIZED, + INIT_REGEX_LOCK(trace_probe_ops) }; static int ftrace_probe_registered; @@ -3256,18 +3278,18 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, if (!cnt) return 0; - mutex_lock(&ftrace_regex_lock); - - ret = -ENODEV; - if (unlikely(ftrace_disabled)) - goto out_unlock; - if (file->f_mode & FMODE_READ) { struct seq_file *m = file->private_data; iter = m->private; } else iter = file->private_data; + mutex_lock(&iter->ops->regex_lock); + + ret = -ENODEV; + if (unlikely(ftrace_disabled)) + goto out_unlock; + parser = &iter->parser; read = trace_get_user(parser, ubuf, cnt, ppos); @@ -3282,7 +3304,7 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, ret = read; out_unlock: - mutex_unlock(&ftrace_regex_lock); + mutex_unlock(&iter->ops->regex_lock); return ret; } @@ -3344,7 +3366,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, if (!hash) return -ENOMEM; - mutex_lock(&ftrace_regex_lock); + mutex_lock(&ops->regex_lock); if (reset) ftrace_filter_reset(hash); if (buf && !ftrace_match_records(hash, buf, len)) { @@ -3366,7 +3388,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, mutex_unlock(&ftrace_lock); out_regex_unlock: - mutex_unlock(&ftrace_regex_lock); + mutex_unlock(&ops->regex_lock); free_ftrace_hash(hash); return ret; @@ -3392,6 +3414,7 @@ ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset) { + ftrace_ops_init(ops); return ftrace_set_addr(ops, ip, remove, reset, 1); } EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); @@ -3416,6 +3439,7 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) { + ftrace_ops_init(ops); return ftrace_set_regex(ops, buf, len, reset, 1); } EXPORT_SYMBOL_GPL(ftrace_set_filter); @@ -3434,6 +3458,7 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter); int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) { + ftrace_ops_init(ops); return ftrace_set_regex(ops, buf, len, reset, 0); } EXPORT_SYMBOL_GPL(ftrace_set_notrace); @@ -3524,6 +3549,8 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable) { char *func; + ftrace_ops_init(ops); + while (buf) { func = strsep(&buf, ","); ftrace_set_regex(ops, func, strlen(func), 0, enable); @@ -3551,14 +3578,14 @@ int ftrace_regex_release(struct inode *inode, struct file *file) int filter_hash; int ret; - mutex_lock(&ftrace_regex_lock); if (file->f_mode & FMODE_READ) { iter = m->private; - seq_release(inode, file); } else iter = file->private_data; + mutex_lock(&iter->ops->regex_lock); + parser = &iter->parser; if (trace_parser_loaded(parser)) { parser->buffer[parser->idx] = 0; @@ -3587,7 +3614,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file) free_ftrace_hash(iter->hash); kfree(iter); - mutex_unlock(&ftrace_regex_lock); + mutex_unlock(&iter->ops->regex_lock); return 0; } @@ -4126,7 +4153,8 @@ void __init ftrace_init(void) static struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + INIT_REGEX_LOCK(global_ops) }; static int __init ftrace_nodyn_init(void) @@ -4180,8 +4208,9 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, } static struct ftrace_ops control_ops = { - .func = ftrace_ops_control_func, - .flags = FTRACE_OPS_FL_RECURSION_SAFE, + .func = ftrace_ops_control_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + INIT_REGEX_LOCK(control_ops) }; static inline void @@ -4539,6 +4568,8 @@ int register_ftrace_function(struct ftrace_ops *ops) { int ret = -1; + ftrace_ops_init(ops); + mutex_lock(&ftrace_lock); ret = __register_ftrace_function(ops); From 3f2367ba7cbf13ec0f3f1e93b833a7eacd1ab4b8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:21 +0900 Subject: [PATCH 06/18] ftrace: Cleanup regex_lock and ftrace_lock around hash updating Cleanup regex_lock and ftrace_lock locking points around ftrace_ops hash update code. The new rule is that regex_lock protects ops->*_hash read-update-write code for each ftrace_ops. Usually, hash update is done by following sequence. 1. allocate a new local hash and copy the original hash. 2. update the local hash. 3. move(actually, copy) back the local hash to ftrace_ops. 4. update ftrace entries if needed. 5. release the local hash. This makes regex_lock protect #1-#4, and ftrace_lock to protect #3, #4 and adding and removing ftrace_ops from the ftrace_ops_list. The ftrace_lock protects #3 as well because the move functions update the entries too. Link: http://lkml.kernel.org/r/20130509054421.30398.83411.stgit@mhiramat-M0-7522 Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 59 +++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 827f2fe7bc3f..cacf0856191e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2656,28 +2656,26 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, return -ENOMEM; } + iter->ops = ops; + iter->flags = flag; + + mutex_lock(&ops->regex_lock); + if (flag & FTRACE_ITER_NOTRACE) hash = ops->notrace_hash; else hash = ops->filter_hash; - iter->ops = ops; - iter->flags = flag; - if (file->f_mode & FMODE_WRITE) { - mutex_lock(&ftrace_lock); iter->hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, hash); - mutex_unlock(&ftrace_lock); - if (!iter->hash) { trace_parser_put(&iter->parser); kfree(iter); - return -ENOMEM; + ret = -ENOMEM; + goto out_unlock; } } - mutex_lock(&ops->regex_lock); - if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) ftrace_filter_reset(iter->hash); @@ -2697,6 +2695,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, } } else file->private_data = iter; + + out_unlock: mutex_unlock(&ops->regex_lock); return ret; @@ -3012,7 +3012,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, if (WARN_ON(not)) return -EINVAL; - mutex_lock(&ftrace_lock); + mutex_lock(&trace_probe_ops.regex_lock); hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); if (!hash) { @@ -3070,14 +3070,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, } while_for_each_ftrace_rec(); + mutex_lock(&ftrace_lock); ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); if (ret < 0) count = ret; __enable_ftrace_function_probe(); + mutex_unlock(&ftrace_lock); out_unlock: - mutex_unlock(&ftrace_lock); + mutex_unlock(&trace_probe_ops.regex_lock); free_ftrace_hash(hash); return count; @@ -3117,7 +3119,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, return; } - mutex_lock(&ftrace_lock); + mutex_lock(&trace_probe_ops.regex_lock); hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); if (!hash) @@ -3155,6 +3157,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, list_add(&entry->free_list, &free_list); } } + mutex_lock(&ftrace_lock); __disable_ftrace_function_probe(); /* * Remove after the disable is called. Otherwise, if the last @@ -3166,9 +3169,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, list_del(&entry->free_list); ftrace_free_entry(entry); } + mutex_unlock(&ftrace_lock); out_unlock: - mutex_unlock(&ftrace_lock); + mutex_unlock(&trace_probe_ops.regex_lock); free_ftrace_hash(hash); } @@ -3284,11 +3288,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, } else iter = file->private_data; - mutex_lock(&iter->ops->regex_lock); - - ret = -ENODEV; if (unlikely(ftrace_disabled)) - goto out_unlock; + return -ENODEV; + + /* iter->hash is a local copy, so we don't need regex_lock */ parser = &iter->parser; read = trace_get_user(parser, ubuf, cnt, ppos); @@ -3299,13 +3302,11 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, parser->idx, enable); trace_parser_clear(parser); if (ret < 0) - goto out_unlock; + goto out; } ret = read; -out_unlock: - mutex_unlock(&iter->ops->regex_lock); - + out: return ret; } @@ -3357,16 +3358,19 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, if (unlikely(ftrace_disabled)) return -ENODEV; + mutex_lock(&ops->regex_lock); + if (enable) orig_hash = &ops->filter_hash; else orig_hash = &ops->notrace_hash; hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); - if (!hash) - return -ENOMEM; + if (!hash) { + ret = -ENOMEM; + goto out_regex_unlock; + } - mutex_lock(&ops->regex_lock); if (reset) ftrace_filter_reset(hash); if (buf && !ftrace_match_records(hash, buf, len)) { @@ -3584,8 +3588,6 @@ int ftrace_regex_release(struct inode *inode, struct file *file) } else iter = file->private_data; - mutex_lock(&iter->ops->regex_lock); - parser = &iter->parser; if (trace_parser_loaded(parser)) { parser->buffer[parser->idx] = 0; @@ -3594,6 +3596,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file) trace_parser_put(parser); + mutex_lock(&iter->ops->regex_lock); + if (file->f_mode & FMODE_WRITE) { filter_hash = !!(iter->flags & FTRACE_ITER_FILTER); @@ -3611,10 +3615,11 @@ int ftrace_regex_release(struct inode *inode, struct file *file) mutex_unlock(&ftrace_lock); } + + mutex_unlock(&iter->ops->regex_lock); free_ftrace_hash(iter->hash); kfree(iter); - mutex_unlock(&iter->ops->regex_lock); return 0; } From cce2c8f26704529f592fc124c7c6ad399940dc5d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:24 +0900 Subject: [PATCH 07/18] tracing/kprobes: Fix to increment return event probe hit-count Fix to increment probe hit-count for function return event. Link: http://lkml.kernel.org/r/20130509054424.30398.34058.stgit@mhiramat-M0-7522 Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Cc: Oleg Nesterov Cc: Srikar Dronamraju Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1865d5f76538..69286337fd7e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -767,6 +767,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, unsigned long irq_flags; struct ftrace_event_call *call = &tp->call; + tp->nhit++; + local_save_flags(irq_flags); pc = preempt_count(); From 30052170dcc256c18a43fb3e76577a67394543f8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:26 +0900 Subject: [PATCH 08/18] tracing: Indicate enabled soft-mode in enable file Indicate enabled soft-mode event as "1*" in "enable" file for each event, because it can be soft-disabled when disable_event trigger is hit. Link: http://lkml.kernel.org/r/20130509054426.30398.28202.stgit@mhiramat-M0-7522 Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 87e826f1c237..915c136d7bd1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -623,6 +623,8 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, if (file->flags & FTRACE_EVENT_FL_ENABLED) { if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED) buf = "0*\n"; + else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE) + buf = "1*\n"; else buf = "1\n"; } else From 1cf4c0732db3cd3c49cadbc60ff6bda08604e6fa Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:29 +0900 Subject: [PATCH 09/18] tracing: Modify soft-mode only if there's no other referrer Modify soft-mode flag only if no other soft-mode referrer (currently only the ftrace triggers) by using a reference counter in each ftrace_event_file. Without this fix, adding and removing several different enable/disable_event triggers on the same event clear soft-mode bit from the ftrace_event_file. This also happens with a typo of glob on setting triggers. e.g. # echo vfs_symlink:enable_event:net:netif_rx > set_ftrace_filter # cat events/net/netif_rx/enable 0* # echo typo_func:enable_event:net:netif_rx > set_ftrace_filter # cat events/net/netif_rx/enable 0 # cat set_ftrace_filter #### all functions enabled #### vfs_symlink:enable_event:net:netif_rx:unlimited As above, we still have a trigger, but soft-mode is gone. Link: http://lkml.kernel.org/r/20130509054429.30398.7464.stgit@mhiramat-M0-7522 Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: David Sharp Cc: Hiraku Toyooka Cc: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + kernel/trace/trace_events.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 34e00fb49bec..4372658c73ae 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -293,6 +293,7 @@ struct ftrace_event_file { * caching and such. Which is mostly OK ;-) */ unsigned long flags; + atomic_t sm_ref; /* soft-mode reference counter */ }; #define __TRACE_EVENT_FLAGS(name, value) \ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 915c136d7bd1..8be1224046f8 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -251,7 +251,8 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, switch (enable) { case 0: /* - * When soft_disable is set and enable is cleared, we want + * When soft_disable is set and enable is cleared, the sm_ref + * reference counter is decremented. If it reaches 0, we want * to clear the SOFT_DISABLED flag but leave the event in the * state that it was. That is, if the event was enabled and * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED @@ -263,6 +264,8 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, * "soft enable"s (clearing the SOFT_DISABLED bit) wont work. */ if (soft_disable) { + if (atomic_dec_return(&file->sm_ref) > 0) + break; disable = file->flags & FTRACE_EVENT_FL_SOFT_DISABLED; clear_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); } else @@ -291,8 +294,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file, */ if (!soft_disable) clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags); - else + else { + if (atomic_inc_return(&file->sm_ref) > 1) + break; set_bit(FTRACE_EVENT_FL_SOFT_MODE_BIT, &file->flags); + } if (!(file->flags & FTRACE_EVENT_FL_ENABLED)) { @@ -1540,6 +1546,7 @@ __trace_add_new_event(struct ftrace_event_call *call, file->event_call = call; file->tr = tr; + atomic_set(&file->sm_ref, 0); list_add(&file->list, &tr->events); return event_create_dir(tr->event_dir, file, id, enable, filter, format); @@ -1562,6 +1569,7 @@ __trace_early_add_new_event(struct ftrace_event_call *call, file->event_call = call; file->tr = tr; + atomic_set(&file->sm_ref, 0); list_add(&file->list, &tr->events); return 0; From da511bf33e47ea1f33f4b672f7da166d2a1b8a91 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 9 May 2013 15:00:07 -0400 Subject: [PATCH 10/18] tracing: Add helper function trace_create_new_event() to remove duplicate code Both __trace_add_new_event() and __trace_early_add_new_event() do basically the same thing, except that __trace_add_new_event() does a little more. Instead of having duplicate code between the two functions, add a helper function trace_create_new_event() that both can use. This will help against having bugs fixed in one function but not the other. Cc: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8be1224046f8..7a0cf68027cc 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1529,6 +1529,24 @@ __register_event(struct ftrace_event_call *call, struct module *mod) return 0; } +static struct ftrace_event_file * +trace_create_new_event(struct ftrace_event_call *call, + struct trace_array *tr) +{ + struct ftrace_event_file *file; + + file = kmem_cache_alloc(file_cachep, GFP_TRACE); + if (!file) + return NULL; + + file->event_call = call; + file->tr = tr; + atomic_set(&file->sm_ref, 0); + list_add(&file->list, &tr->events); + + return file; +} + /* Add an event to a trace directory */ static int __trace_add_new_event(struct ftrace_event_call *call, @@ -1540,15 +1558,10 @@ __trace_add_new_event(struct ftrace_event_call *call, { struct ftrace_event_file *file; - file = kmem_cache_alloc(file_cachep, GFP_TRACE); + file = trace_create_new_event(call, tr); if (!file) return -ENOMEM; - file->event_call = call; - file->tr = tr; - atomic_set(&file->sm_ref, 0); - list_add(&file->list, &tr->events); - return event_create_dir(tr->event_dir, file, id, enable, filter, format); } @@ -1563,15 +1576,10 @@ __trace_early_add_new_event(struct ftrace_event_call *call, { struct ftrace_event_file *file; - file = kmem_cache_alloc(file_cachep, GFP_TRACE); + file = trace_create_new_event(call, tr); if (!file) return -ENOMEM; - file->event_call = call; - file->tr = tr; - atomic_set(&file->sm_ref, 0); - list_add(&file->list, &tr->events); - return 0; } From 5ae0bf5972b66d35e5674e1b7d855b1e111a68ae Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 9 May 2013 18:20:37 -0400 Subject: [PATCH 11/18] ftrace: Fix locking in register_ftrace_function_probe() The iteration of the ftrace function list and the call to ftrace_match_record() need to be protected by the ftrace_lock. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cacf0856191e..f104c45cbcc1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3017,14 +3017,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); if (!hash) { count = -ENOMEM; - goto out_unlock; + goto out; } if (unlikely(ftrace_disabled)) { count = -ENODEV; - goto out_unlock; + goto out; } + mutex_lock(&ftrace_lock); + do_for_each_ftrace_rec(pg, rec) { if (!ftrace_match_record(rec, NULL, search, len, type)) @@ -3070,15 +3072,15 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, } while_for_each_ftrace_rec(); - mutex_lock(&ftrace_lock); ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); if (ret < 0) count = ret; __enable_ftrace_function_probe(); - mutex_unlock(&ftrace_lock); out_unlock: + mutex_unlock(&ftrace_lock); + out: mutex_unlock(&trace_probe_ops.regex_lock); free_ftrace_hash(hash); From 23ea9c4dda129fe1711f9fbda03c7a9c91cf1322 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 9 May 2013 19:31:48 -0400 Subject: [PATCH 12/18] ftrace: Fix the output of enabled_functions debug file The enabled_functions debugfs file was created to be able to see what functions have been modified from nops to calling a tracer. The current method uses the counter in the function record. As when a ftrace_ops is registered to a function, its count increases. But that doesn't mean that the function is actively being traced. /proc/sys/kernel/ftrace_enabled can be set to zero which would disable it, as well as something can go wrong and we can think its enabled when only the counter is set. The record's FTRACE_FL_ENABLED flag is set or cleared when its function is modified. That is a much more accurate way of knowing what function is enabled or not. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f104c45cbcc1..dcca9fa29bf4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2459,7 +2459,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos) !ftrace_lookup_ip(ops->notrace_hash, rec->ip)) || ((iter->flags & FTRACE_ITER_ENABLED) && - !(rec->flags & ~FTRACE_FL_MASK))) { + !(rec->flags & FTRACE_FL_ENABLED))) { rec = NULL; goto retry; From 19dd603e45199d93d61e9853c596d098e04e5d66 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 9 May 2013 19:37:36 -0400 Subject: [PATCH 13/18] ftrace: Fix function probe when more than one probe is added When the first function probe is added and the function tracer is updated the functions are modified to call the probe. But when a second function is added, it updates the function records to have the second function also update, but it fails to update the actual function itself. This prevents the second (or third or forth and so on) probes from having their functions called. # echo vfs_symlink:enable_event:sched:sched_switch > set_ftrace_filter # echo vfs_unlink:enable_event:sched:sched_switch > set_ftrace_filter # cat trace # tracer: nop # # entries-in-buffer/entries-written: 0/0 #P:4 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | # touch /tmp/a # rm /tmp/a # cat trace # tracer: nop # # entries-in-buffer/entries-written: 0/0 #P:4 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | # ln -s /tmp/a # cat trace # tracer: nop # # entries-in-buffer/entries-written: 414/414 #P:4 # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | -0 [000] d..3 2847.923031: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=2786 next_prio=120 <...>-3114 [001] d..4 2847.923035: sched_switch: prev_comm=ln prev_pid=3114 prev_prio=120 prev_state=x ==> next_comm=swapper/1 next_pid=0 next_prio=120 bash-2786 [000] d..3 2847.923535: sched_switch: prev_comm=bash prev_pid=2786 prev_prio=120 prev_state=S ==> next_comm=kworker/0:1 next_pid=34 next_prio=120 kworker/0:1-34 [000] d..3 2847.923552: sched_switch: prev_comm=kworker/0:1 prev_pid=34 prev_prio=120 prev_state=S ==> next_comm=swapper/0 next_pid=0 next_prio=120 -0 [002] d..3 2847.923554: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=sshd next_pid=2783 next_prio=120 sshd-2783 [002] d..3 2847.923660: sched_switch: prev_comm=sshd prev_pid=2783 prev_prio=120 prev_state=S ==> next_comm=swapper/2 next_pid=0 next_prio=120 Still need to update the functions even though the probe itself does not need to be registered again when added a new probe. Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index dcca9fa29bf4..b549b0f5b977 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2941,8 +2941,12 @@ static void __enable_ftrace_function_probe(void) int ret; int i; - if (ftrace_probe_registered) + if (ftrace_probe_registered) { + /* still need to update the function call sites */ + if (ftrace_enabled) + ftrace_run_update_code(FTRACE_UPDATE_CALLS); return; + } for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { struct hlist_head *hhd = &ftrace_func_hash[i]; From db02038f4e6a776fd3a0bb71242be37ff378ce86 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:32 +0900 Subject: [PATCH 14/18] tracing/kprobes: Use bool for retprobe checker Use bool instead of int for kretprobe checker. Link: http://lkml.kernel.org/r/20130509054431.30398.38561.stgit@mhiramat-M0-7522 Cc: Srikar Dronamraju Cc: Oleg Nesterov Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 69286337fd7e..0b7386a54b1e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -46,7 +46,7 @@ struct trace_probe { (sizeof(struct probe_arg) * (n))) -static __kprobes int trace_probe_is_return(struct trace_probe *tp) +static __kprobes bool trace_probe_is_return(struct trace_probe *tp) { return tp->rp.handler != NULL; } From 48182bd2261766b810e4e4269a23236c1ace63fb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:36 +0900 Subject: [PATCH 15/18] tracing/kprobes: Increment probe hit-count even if it is used by perf Increment probe hit-count for profiling even if it is used by perf tool. Same thing has already done in trace_uprobe. Link: http://lkml.kernel.org/r/20130509054436.30398.21133.stgit@mhiramat-M0-7522 Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Cc: Oleg Nesterov Cc: Srikar Dronamraju Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 0b7386a54b1e..6e86fbbae337 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -733,8 +733,6 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) unsigned long irq_flags; struct ftrace_event_call *call = &tp->call; - tp->nhit++; - local_save_flags(irq_flags); pc = preempt_count(); @@ -767,8 +765,6 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, unsigned long irq_flags; struct ftrace_event_call *call = &tp->call; - tp->nhit++; - local_save_flags(irq_flags); pc = preempt_count(); @@ -1075,6 +1071,8 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + tp->nhit++; + if (tp->flags & TP_FLAG_TRACE) kprobe_trace_func(kp, regs); #ifdef CONFIG_PERF_EVENTS @@ -1089,6 +1087,8 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + tp->nhit++; + if (tp->flags & TP_FLAG_TRACE) kretprobe_trace_func(ri, regs); #ifdef CONFIG_PERF_EVENTS From 2b106aabe6c566ba19c352f22683381e1ea41326 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:41 +0900 Subject: [PATCH 16/18] tracing/kprobes: Pass trace_probe directly from dispatcher Pass the pointer of struct trace_probe directly from probe dispatcher to handlers. This removes redundant container_of macro uses. Same thing has already done in trace_uprobe. Link: http://lkml.kernel.org/r/20130509054441.30398.69112.stgit@mhiramat-M0-7522 Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Cc: Oleg Nesterov Cc: Srikar Dronamraju Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6e86fbbae337..9ca44fc3fb0b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -723,9 +723,9 @@ static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, } /* Kprobe handler */ -static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) +static __kprobes void +kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) { - struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct kprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; @@ -745,7 +745,7 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) return; entry = ring_buffer_event_data(event); - entry->ip = (unsigned long)kp->addr; + entry->ip = (unsigned long)tp->rp.kp.addr; store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); if (!filter_current_check_discard(buffer, call, entry, event)) @@ -754,10 +754,10 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) } /* Kretprobe handler */ -static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri, - struct pt_regs *regs) +static __kprobes void +kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, + struct pt_regs *regs) { - struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct kretprobe_trace_entry_head *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; @@ -973,10 +973,9 @@ static int set_print_fmt(struct trace_probe *tp) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static __kprobes void kprobe_perf_func(struct kprobe *kp, - struct pt_regs *regs) +static __kprobes void +kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs) { - struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry_head *entry; struct hlist_head *head; @@ -995,7 +994,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, if (!entry) return; - entry->ip = (unsigned long)kp->addr; + entry->ip = (unsigned long)tp->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize); @@ -1005,10 +1004,10 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, } /* Kretprobe profile handler */ -static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, - struct pt_regs *regs) +static __kprobes void +kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri, + struct pt_regs *regs) { - struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; @@ -1074,10 +1073,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) tp->nhit++; if (tp->flags & TP_FLAG_TRACE) - kprobe_trace_func(kp, regs); + kprobe_trace_func(tp, regs); #ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) - kprobe_perf_func(kp, regs); + kprobe_perf_func(tp, regs); #endif return 0; /* We don't tweek kernel, so just return 0 */ } @@ -1090,10 +1089,10 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) tp->nhit++; if (tp->flags & TP_FLAG_TRACE) - kretprobe_trace_func(ri, regs); + kretprobe_trace_func(tp, ri, regs); #ifdef CONFIG_PERF_EVENTS if (tp->flags & TP_FLAG_PROFILE) - kretprobe_perf_func(ri, regs); + kretprobe_perf_func(tp, ri, regs); #endif return 0; /* We don't tweek kernel, so just return 0 */ } From 41a7dd420c57323391d58b553318c1fad8e7ebc2 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:49 +0900 Subject: [PATCH 17/18] tracing/kprobes: Support ftrace_event_file base multibuffer Support multi-buffer on kprobe-based dynamic events by using ftrace_event_file. Link: http://lkml.kernel.org/r/20130509054449.30398.88343.stgit@mhiramat-M0-7522 Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Cc: Oleg Nesterov Cc: Srikar Dronamraju Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 250 ++++++++++++++++++++++++++++++------ 1 file changed, 214 insertions(+), 36 deletions(-) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9ca44fc3fb0b..fee865d8a7c4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -27,7 +27,6 @@ /** * Kprobe event core functions */ - struct trace_probe { struct list_head list; struct kretprobe rp; /* Use rp.kp for kprobe use */ @@ -36,6 +35,7 @@ struct trace_probe { const char *symbol; /* symbol name */ struct ftrace_event_class class; struct ftrace_event_call call; + struct ftrace_event_file **files; ssize_t size; /* trace entry size */ unsigned int nr_args; struct probe_arg args[]; @@ -183,12 +183,57 @@ static struct trace_probe *find_trace_probe(const char *event, return NULL; } -/* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ -static int enable_trace_probe(struct trace_probe *tp, int flag) +static int trace_probe_nr_files(struct trace_probe *tp) +{ + struct ftrace_event_file **file = tp->files; + int ret = 0; + + if (file) + while (*(file++)) + ret++; + + return ret; +} + +static DEFINE_MUTEX(probe_enable_lock); + +/* + * Enable trace_probe + * if the file is NULL, enable "perf" handler, or enable "trace" handler. + */ +static int +enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) { int ret = 0; - tp->flags |= flag; + mutex_lock(&probe_enable_lock); + + if (file) { + struct ftrace_event_file **new, **old = tp->files; + int n = trace_probe_nr_files(tp); + + /* 1 is for new one and 1 is for stopper */ + new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *), + GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto out_unlock; + } + memcpy(new, old, n * sizeof(struct ftrace_event_file *)); + new[n] = file; + /* The last one keeps a NULL */ + + rcu_assign_pointer(tp->files, new); + tp->flags |= TP_FLAG_TRACE; + + if (old) { + /* Make sure the probe is done with old files */ + synchronize_sched(); + kfree(old); + } + } else + tp->flags |= TP_FLAG_PROFILE; + if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) { if (trace_probe_is_return(tp)) @@ -197,19 +242,83 @@ static int enable_trace_probe(struct trace_probe *tp, int flag) ret = enable_kprobe(&tp->rp.kp); } + out_unlock: + mutex_unlock(&probe_enable_lock); + return ret; } -/* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */ -static void disable_trace_probe(struct trace_probe *tp, int flag) +static int +trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file) { - tp->flags &= ~flag; + int i; + + if (tp->files) { + for (i = 0; tp->files[i]; i++) + if (tp->files[i] == file) + return i; + } + + return -1; +} + +/* + * Disable trace_probe + * if the file is NULL, disable "perf" handler, or disable "trace" handler. + */ +static int +disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file) +{ + int ret = 0; + + mutex_lock(&probe_enable_lock); + + if (file) { + struct ftrace_event_file **new, **old = tp->files; + int n = trace_probe_nr_files(tp); + int i, j; + + if (n == 0 || trace_probe_file_index(tp, file) < 0) { + ret = -EINVAL; + goto out_unlock; + } + + if (n == 1) { /* Remove the last file */ + tp->flags &= ~TP_FLAG_TRACE; + new = NULL; + } else { + new = kzalloc(n * sizeof(struct ftrace_event_file *), + GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto out_unlock; + } + + /* This copy & check loop copies the NULL stopper too */ + for (i = 0, j = 0; j < n && i < n + 1; i++) + if (old[i] != file) + new[j++] = old[i]; + } + + rcu_assign_pointer(tp->files, new); + + /* Make sure the probe is done with old files */ + synchronize_sched(); + kfree(old); + } else + tp->flags &= ~TP_FLAG_PROFILE; + if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) { if (trace_probe_is_return(tp)) disable_kretprobe(&tp->rp); else disable_kprobe(&tp->rp.kp); } + + out_unlock: + mutex_unlock(&probe_enable_lock); + + return ret; } /* Internal register function - just handle k*probes and flags */ @@ -724,7 +833,8 @@ static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp, /* Kprobe handler */ static __kprobes void -kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) +__kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, + struct ftrace_event_file *ftrace_file) { struct kprobe_trace_entry_head *entry; struct ring_buffer_event *event; @@ -733,14 +843,17 @@ kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) unsigned long irq_flags; struct ftrace_event_call *call = &tp->call; + WARN_ON(call != ftrace_file->event_call); + local_save_flags(irq_flags); pc = preempt_count(); dsize = __get_data_size(tp, regs); size = sizeof(*entry) + tp->size + dsize; - event = trace_current_buffer_lock_reserve(&buffer, call->event.type, - size, irq_flags, pc); + event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + call->event.type, + size, irq_flags, pc); if (!event) return; @@ -753,10 +866,23 @@ kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) irq_flags, pc, regs); } +static __kprobes void +kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs) +{ + struct ftrace_event_file **file = tp->files; + + /* Note: preempt is already disabled around the kprobe handler */ + while (*file) { + __kprobe_trace_func(tp, regs, *file); + file++; + } +} + /* Kretprobe handler */ static __kprobes void -kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, - struct pt_regs *regs) +__kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, + struct pt_regs *regs, + struct ftrace_event_file *ftrace_file) { struct kretprobe_trace_entry_head *entry; struct ring_buffer_event *event; @@ -765,14 +891,17 @@ kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, unsigned long irq_flags; struct ftrace_event_call *call = &tp->call; + WARN_ON(call != ftrace_file->event_call); + local_save_flags(irq_flags); pc = preempt_count(); dsize = __get_data_size(tp, regs); size = sizeof(*entry) + tp->size + dsize; - event = trace_current_buffer_lock_reserve(&buffer, call->event.type, - size, irq_flags, pc); + event = trace_event_buffer_lock_reserve(&buffer, ftrace_file, + call->event.type, + size, irq_flags, pc); if (!event) return; @@ -786,6 +915,19 @@ kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, irq_flags, pc, regs); } +static __kprobes void +kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + struct ftrace_event_file **file = tp->files; + + /* Note: preempt is already disabled around the kprobe handler */ + while (*file) { + __kretprobe_trace_func(tp, ri, regs, *file); + file++; + } +} + /* Event entry printers */ enum print_line_t print_kprobe_event(struct trace_iterator *iter, int flags, @@ -1041,20 +1183,19 @@ int kprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) { struct trace_probe *tp = (struct trace_probe *)event->data; + struct ftrace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: - return enable_trace_probe(tp, TP_FLAG_TRACE); + return enable_trace_probe(tp, file); case TRACE_REG_UNREGISTER: - disable_trace_probe(tp, TP_FLAG_TRACE); - return 0; + return disable_trace_probe(tp, file); #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: - return enable_trace_probe(tp, TP_FLAG_PROFILE); + return enable_trace_probe(tp, NULL); case TRACE_REG_PERF_UNREGISTER: - disable_trace_probe(tp, TP_FLAG_PROFILE); - return 0; + return disable_trace_probe(tp, NULL); case TRACE_REG_PERF_OPEN: case TRACE_REG_PERF_CLOSE: case TRACE_REG_PERF_ADD: @@ -1190,11 +1331,24 @@ static __used int kprobe_trace_selftest_target(int a1, int a2, int a3, return a1 + a2 + a3 + a4 + a5 + a6; } +static struct ftrace_event_file * +find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr) +{ + struct ftrace_event_file *file; + + list_for_each_entry(file, &tr->events, list) + if (file->event_call == &tp->call) + return file; + + return NULL; +} + static __init int kprobe_trace_self_tests_init(void) { int ret, warn = 0; int (*target)(int, int, int, int, int, int); struct trace_probe *tp; + struct ftrace_event_file *file; target = kprobe_trace_selftest_target; @@ -1204,31 +1358,43 @@ static __init int kprobe_trace_self_tests_init(void) "$stack $stack0 +0($stack)", create_trace_probe); if (WARN_ON_ONCE(ret)) { - pr_warning("error on probing function entry.\n"); + pr_warn("error on probing function entry.\n"); warn++; } else { /* Enable trace point */ tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); if (WARN_ON_ONCE(tp == NULL)) { - pr_warning("error on getting new probe.\n"); + pr_warn("error on getting new probe.\n"); warn++; - } else - enable_trace_probe(tp, TP_FLAG_TRACE); + } else { + file = find_trace_probe_file(tp, top_trace_array()); + if (WARN_ON_ONCE(file == NULL)) { + pr_warn("error on getting probe file.\n"); + warn++; + } else + enable_trace_probe(tp, file); + } } ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " "$retval", create_trace_probe); if (WARN_ON_ONCE(ret)) { - pr_warning("error on probing function return.\n"); + pr_warn("error on probing function return.\n"); warn++; } else { /* Enable trace point */ tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); if (WARN_ON_ONCE(tp == NULL)) { - pr_warning("error on getting new probe.\n"); + pr_warn("error on getting 2nd new probe.\n"); warn++; - } else - enable_trace_probe(tp, TP_FLAG_TRACE); + } else { + file = find_trace_probe_file(tp, top_trace_array()); + if (WARN_ON_ONCE(file == NULL)) { + pr_warn("error on getting probe file.\n"); + warn++; + } else + enable_trace_probe(tp, file); + } } if (warn) @@ -1239,27 +1405,39 @@ static __init int kprobe_trace_self_tests_init(void) /* Disable trace points before removing it */ tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM); if (WARN_ON_ONCE(tp == NULL)) { - pr_warning("error on getting test probe.\n"); + pr_warn("error on getting test probe.\n"); warn++; - } else - disable_trace_probe(tp, TP_FLAG_TRACE); + } else { + file = find_trace_probe_file(tp, top_trace_array()); + if (WARN_ON_ONCE(file == NULL)) { + pr_warn("error on getting probe file.\n"); + warn++; + } else + disable_trace_probe(tp, file); + } tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM); if (WARN_ON_ONCE(tp == NULL)) { - pr_warning("error on getting 2nd test probe.\n"); + pr_warn("error on getting 2nd test probe.\n"); warn++; - } else - disable_trace_probe(tp, TP_FLAG_TRACE); + } else { + file = find_trace_probe_file(tp, top_trace_array()); + if (WARN_ON_ONCE(file == NULL)) { + pr_warn("error on getting probe file.\n"); + warn++; + } else + disable_trace_probe(tp, file); + } ret = traceprobe_command("-:testprobe", create_trace_probe); if (WARN_ON_ONCE(ret)) { - pr_warning("error on deleting a probe.\n"); + pr_warn("error on deleting a probe.\n"); warn++; } ret = traceprobe_command("-:testprobe2", create_trace_probe); if (WARN_ON_ONCE(ret)) { - pr_warning("error on deleting a probe.\n"); + pr_warn("error on deleting a probe.\n"); warn++; } From b8820084f2130b3dcfb09c78ac16cdd2194a345b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 9 May 2013 14:44:54 +0900 Subject: [PATCH 18/18] tracing/kprobes: Support soft-mode disabling Support soft-mode disabling on kprobe-based dynamic events. Soft-disabling is just ignoring recording if the soft disabled flag is set. Link: http://lkml.kernel.org/r/20130509054454.30398.7237.stgit@mhiramat-M0-7522 Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Tom Zanussi Cc: Oleg Nesterov Cc: Srikar Dronamraju Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index fee865d8a7c4..636d45fe69b3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -845,6 +845,9 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs, WARN_ON(call != ftrace_file->event_call); + if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) + return; + local_save_flags(irq_flags); pc = preempt_count(); @@ -893,6 +896,9 @@ __kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri, WARN_ON(call != ftrace_file->event_call); + if (test_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &ftrace_file->flags)) + return; + local_save_flags(irq_flags); pc = preempt_count();