From a7a2c2347c93cc3fdb70e106a3145da68091ce5c Mon Sep 17 00:00:00 2001 From: Daniel Campello Date: Mon, 20 Jul 2015 10:37:56 -0700 Subject: [PATCH] Page cache miss tracing using ftrace on mm/filemap This patch includes two trace events on generic_perform_write and do_generic_file_read to check on the address_space mapping for the pages to be accessed by the request. Change-Id: Ib319b9b2c971b9e5c76645be6cfd995ef9465d77 Signed-off-by: Daniel Campello Conflicts: include/linux/pagemap.h --- include/linux/pagemap.h | 7 +++-- include/trace/events/filemap.h | 46 ++++++++++++++++++++++++++++++ mm/filemap.c | 51 ++++++++++++++++++++++++++++++++-- 3 files changed, 99 insertions(+), 5 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f6194c584780..0ee8433bbb6c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -254,6 +254,9 @@ typedef int filler_t(void *, struct page *); extern struct page * find_get_page(struct address_space *mapping, pgoff_t index); +pgoff_t page_cache_next_hole(struct address_space *mapping, + pgoff_t index, unsigned long max_scan); + extern struct page * find_lock_page(struct address_space *mapping, pgoff_t index); extern struct page * find_or_create_page(struct address_space *mapping, @@ -400,7 +403,7 @@ static inline int wait_on_page_locked_killable(struct page *page) return 0; } -/* +/* * Wait for a page to be unlocked. * * This must be called with the caller "holding" the page, @@ -413,7 +416,7 @@ static inline void wait_on_page_locked(struct page *page) wait_on_page_bit(page, PG_locked); } -/* +/* * Wait for a page to complete writeback */ static inline void wait_on_page_writeback(struct page *page) diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h index 0421f49a20f7..2489b79d1b91 100644 --- a/include/trace/events/filemap.h +++ b/include/trace/events/filemap.h @@ -52,6 +52,52 @@ DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache, TP_ARGS(page) ); +DECLARE_EVENT_CLASS(mm_filemap_find_page_cache_miss, + + TP_PROTO(struct file *file, loff_t pos, size_t count, int read), + + TP_ARGS(file, pos, count, read), + + TP_STRUCT__entry( + __array(char, path, MAX_FILTER_STR_VAL) + __field(char *, path_name) + __field(loff_t, pos) + __field(size_t, count) + __field(int, miss) + ), + + TP_fast_assign( + __entry->path_name = d_path(&file->f_path, __entry->path, MAX_FILTER_STR_VAL); + __entry->pos = pos; + __entry->count = count; + __entry->miss = 0; + if ((pos & ~PAGE_CACHE_MASK) || (count % PAGE_SIZE) || read) { + unsigned long ret; + rcu_read_lock(); + ret = (count ? page_cache_next_hole(file->f_mapping, + (pos >> PAGE_CACHE_SHIFT), ((count - 1) >> PAGE_CACHE_SHIFT) + 1) : 0); + rcu_read_unlock(); + __entry->miss = (ret >= (pos >> PAGE_CACHE_SHIFT) && + ret <= ((pos + count - 1) >> PAGE_CACHE_SHIFT)); + } + ), + + TP_printk("path_name %s pos %lld count %lu miss %s", + __entry->path_name, + __entry->pos, __entry->count, + (__entry->miss ? "yes" : "no")) +); + +DEFINE_EVENT(mm_filemap_find_page_cache_miss, mm_filemap_do_generic_file_read, + TP_PROTO(struct file *file, loff_t pos, size_t count, int read), + TP_ARGS(file, pos, count, read) + ); + +DEFINE_EVENT(mm_filemap_find_page_cache_miss, mm_filemap_generic_perform_write, + TP_PROTO(struct file *file, loff_t pos, size_t count, int read), + TP_ARGS(file, pos, count, read) + ); + #endif /* _TRACE_FILEMAP_H */ /* This part must be outside protection */ diff --git a/mm/filemap.c b/mm/filemap.c index f572707b961f..34d5a6489b19 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -683,6 +683,47 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm, } } +/** + * page_cache_next_hole - find the next hole (not-present entry) + * @mapping: mapping + * @index: index + * @max_scan: maximum range to search + * + * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the + * lowest indexed hole. + * + * Returns: the index of the hole if found, otherwise returns an index + * outside of the set specified (in which case 'return - index >= + * max_scan' will be true). In rare cases of index wrap-around, 0 will + * be returned. + * + * page_cache_next_hole may be called under rcu_read_lock. However, + * like radix_tree_gang_lookup, this will not atomically search a + * snapshot of the tree at a single point in time. For example, if a + * hole is created at index 5, then subsequently a hole is created at + * index 10, page_cache_next_hole covering both indexes may return 10 + * if called under rcu_read_lock. + */ +pgoff_t page_cache_next_hole(struct address_space *mapping, + pgoff_t index, unsigned long max_scan) +{ + unsigned long i; + + for (i = 0; i < max_scan; i++) { + struct page *page; + + page = radix_tree_lookup(&mapping->page_tree, index); + if (!page || radix_tree_exceptional_entry(page)) + break; + index++; + if (index == 0) + break; + } + + return index; +} +EXPORT_SYMBOL(page_cache_next_hole); + /** * find_get_page - find and get a page reference * @mapping: the address_space to search @@ -1109,6 +1150,8 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos, unsigned int prev_offset; int error; + trace_mm_filemap_do_generic_file_read(filp, *ppos, desc->count, 1); + index = *ppos >> PAGE_CACHE_SHIFT; prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); @@ -1503,7 +1546,7 @@ EXPORT_SYMBOL(generic_file_aio_read); static int page_cache_read(struct file *file, pgoff_t offset) { struct address_space *mapping = file->f_mapping; - struct page *page; + struct page *page; int ret; do { @@ -1520,7 +1563,7 @@ static int page_cache_read(struct file *file, pgoff_t offset) page_cache_release(page); } while (ret == AOP_TRUNCATED_PAGE); - + return ret; } @@ -2315,6 +2358,8 @@ static ssize_t generic_perform_write(struct file *file, ssize_t written = 0; unsigned int flags = 0; + trace_mm_filemap_generic_perform_write(file, pos, iov_iter_count(i), 0); + /* * Copies from kernel address space cannot fail (NFSD is a big user). */ @@ -2413,7 +2458,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, written += status; *ppos = pos + status; } - + return written ? written : status; } EXPORT_SYMBOL(generic_file_buffered_write);