diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index fb5e4c658f7a..ef470a7a3d0f 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -14,6 +14,8 @@ #include #include +#include +#include #include #include @@ -73,7 +75,7 @@ do { \ else \ mm->mmap = NULL; \ rb_erase(&high_vma->vm_rb, &mm->mm_rb); \ - mm->mmap_cache = NULL; \ + vmacache_invalidate(mm); \ mm->map_count--; \ remove_vma(high_vma); \ } \ diff --git a/fs/exec.c b/fs/exec.c index 3e5b3eb98626..1a818cb67d70 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -835,7 +836,7 @@ EXPORT_SYMBOL(read_code); static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; - struct mm_struct * old_mm, *active_mm; + struct mm_struct *old_mm, *active_mm; /* Notify parent that we're no longer interested in the old VM */ tsk = current; @@ -861,6 +862,8 @@ static int exec_mmap(struct mm_struct *mm) tsk->mm = mm; tsk->active_mm = mm; activate_mm(active_mm, mm); + tsk->mm->vmacache_seqnum = 0; + vmacache_flush(tsk); task_unlock(tsk); arch_pick_mmap_layout(mm); if (old_mm) { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ced4a0b58326..0c5e698991f8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -202,7 +203,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) /* * We remember last_addr rather than next_addr to hit with - * mmap_cache most of the time. We have zero last_addr at + * vmacache most of the time. We have zero last_addr at * the beginning and also after lseek. We will have -1 last_addr * after the end of the vmas. */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 86e2c54468dc..6db0d013fb09 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -338,9 +338,9 @@ struct mm_rss_stat { }; struct mm_struct { - struct vm_area_struct * mmap; /* list of VMAs */ + struct vm_area_struct *mmap; /* list of VMAs */ struct rb_root mm_rb; - struct vm_area_struct * mmap_cache; /* last find_vma result */ + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, diff --git a/include/linux/sched.h b/include/linux/sched.h index fc31780ed948..c2589ab0ec92 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -134,6 +134,10 @@ struct fs_struct; struct perf_event_context; struct blk_plug; +#define VMACACHE_BITS 2 +#define VMACACHE_SIZE (1U << VMACACHE_BITS) +#define VMACACHE_MASK (VMACACHE_SIZE - 1) + /* * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. @@ -1293,6 +1297,9 @@ struct task_struct { #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif + /* per-thread vma caching */ + u32 vmacache_seqnum; + struct vm_area_struct *vmacache[VMACACHE_SIZE]; #if defined(SPLIT_RSS_COUNTING) struct task_rss_stat rss_stat; #endif diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h new file mode 100644 index 000000000000..c3fa0fd43949 --- /dev/null +++ b/include/linux/vmacache.h @@ -0,0 +1,38 @@ +#ifndef __LINUX_VMACACHE_H +#define __LINUX_VMACACHE_H + +#include +#include + +/* + * Hash based on the page number. Provides a good hit rate for + * workloads with good locality and those with random accesses as well. + */ +#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK) + +static inline void vmacache_flush(struct task_struct *tsk) +{ + memset(tsk->vmacache, 0, sizeof(tsk->vmacache)); +} + +extern void vmacache_flush_all(struct mm_struct *mm); +extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); +extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, + unsigned long addr); + +#ifndef CONFIG_MMU +extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, + unsigned long start, + unsigned long end); +#endif + +static inline void vmacache_invalidate(struct mm_struct *mm) +{ + mm->vmacache_seqnum++; + + /* deal with overflows */ + if (unlikely(mm->vmacache_seqnum == 0)) + vmacache_flush_all(mm); +} + +#endif /* __LINUX_VMACACHE_H */ diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index d9e098df2ef3..ab7d472cc869 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -230,10 +231,17 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) if (!CACHE_FLUSH_IS_SAFE) return; - if (current->mm && current->mm->mmap_cache) { - flush_cache_range(current->mm->mmap_cache, - addr, addr + BREAK_INSTR_SIZE); + if (current->mm) { + int i; + + for (i = 0; i < VMACACHE_SIZE; i++) { + if (!current->vmacache[i]) + continue; + flush_cache_range(current->vmacache[i], + addr, addr + BREAK_INSTR_SIZE); + } } + /* Force flush instruction cache if it was outside the mm */ flush_icache_range(addr, addr + BREAK_INSTR_SIZE); } diff --git a/kernel/fork.c b/kernel/fork.c index 56b3c1a281db..c2c02c6c5421 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -391,7 +393,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->locked_vm = 0; mm->mmap = NULL; - mm->mmap_cache = NULL; + mm->vmacache_seqnum = 0; mm->map_count = 0; cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; @@ -913,6 +915,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) if (!oldmm) return 0; + /* initialize the new vmacache entries */ + vmacache_flush(tsk); + if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; diff --git a/mm/Makefile b/mm/Makefile index 85b2a6eb781d..2b844e0895cb 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -19,7 +19,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ util.o mmzone.o vmstat.o backing-dev.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ - compaction.o balloon_compaction.o \ + compaction.o balloon_compaction.o vmacache.o \ interval_tree.o $(mmu-y) \ showmem.o vmpressure.o diff --git a/mm/mmap.c b/mm/mmap.c index fad64ca52881..2fb7fafb47ca 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -708,8 +709,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, prev->vm_next = next = vma->vm_next; if (next) next->vm_prev = prev; - if (mm->mmap_cache == vma) - mm->mmap_cache = prev; + + /* Kill the cache */ + vmacache_invalidate(mm); } /* @@ -2062,34 +2064,33 @@ EXPORT_SYMBOL(get_unmapped_area); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = NULL; + struct rb_node *rb_node; + struct vm_area_struct *vma; /* Check the cache first. */ - /* (Cache hit rate is typically around 35%.) */ - vma = ACCESS_ONCE(mm->mmap_cache); - if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { - struct rb_node *rb_node; + vma = vmacache_find(mm, addr); + if (likely(vma)) + return vma; - rb_node = mm->mm_rb.rb_node; - vma = NULL; + rb_node = mm->mm_rb.rb_node; + vma = NULL; - while (rb_node) { - struct vm_area_struct *vma_tmp; + while (rb_node) { + struct vm_area_struct *tmp; - vma_tmp = rb_entry(rb_node, - struct vm_area_struct, vm_rb); + tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb); - if (vma_tmp->vm_end > addr) { - vma = vma_tmp; - if (vma_tmp->vm_start <= addr) - break; - rb_node = rb_node->rb_left; - } else - rb_node = rb_node->rb_right; - } - if (vma) - mm->mmap_cache = vma; + if (tmp->vm_end > addr) { + vma = tmp; + if (tmp->vm_start <= addr) + break; + rb_node = rb_node->rb_left; + } else + rb_node = rb_node->rb_right; } + + if (vma) + vmacache_update(addr, vma); return vma; } @@ -2473,7 +2474,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, } else mm->highest_vm_end = prev ? vm_end_gap(prev) : 0; tail_vma->vm_next = NULL; - mm->mmap_cache = NULL; /* Kill the cache. */ + + /* Kill the cache */ + vmacache_invalidate(mm); } /* diff --git a/mm/nommu.c b/mm/nommu.c index 08118e026afb..48ad63aa66d4 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -765,16 +766,23 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) */ static void delete_vma_from_mm(struct vm_area_struct *vma) { + int i; struct address_space *mapping; struct mm_struct *mm = vma->vm_mm; + struct task_struct *curr = current; kenter("%p", vma); protect_vma(vma, 0); mm->map_count--; - if (mm->mmap_cache == vma) - mm->mmap_cache = NULL; + for (i = 0; i < VMACACHE_SIZE; i++) { + /* if the vma is cached, invalidate the entire cache */ + if (curr->vmacache[i] == vma) { + vmacache_invalidate(curr->mm); + break; + } + } /* remove the VMA from the mapping */ if (vma->vm_file) { @@ -822,8 +830,8 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) struct vm_area_struct *vma; /* check the cache first */ - vma = ACCESS_ONCE(mm->mmap_cache); - if (vma && vma->vm_start <= addr && vma->vm_end > addr) + vma = vmacache_find(mm, addr); + if (likely(vma)) return vma; /* trawl the list (there may be multiple mappings in which addr @@ -832,7 +840,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) if (vma->vm_start > addr) return NULL; if (vma->vm_end > addr) { - mm->mmap_cache = vma; + vmacache_update(addr, vma); return vma; } } @@ -871,8 +879,8 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, unsigned long end = addr + len; /* check the cache first */ - vma = mm->mmap_cache; - if (vma && vma->vm_start == addr && vma->vm_end == end) + vma = vmacache_find_exact(mm, addr, end); + if (vma) return vma; /* trawl the list (there may be multiple mappings in which addr @@ -883,7 +891,7 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm, if (vma->vm_start > addr) return NULL; if (vma->vm_end == end) { - mm->mmap_cache = vma; + vmacache_update(addr, vma); return vma; } } diff --git a/mm/vmacache.c b/mm/vmacache.c new file mode 100644 index 000000000000..d4224b397c0e --- /dev/null +++ b/mm/vmacache.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2014 Davidlohr Bueso. + */ +#include +#include +#include + +/* + * Flush vma caches for threads that share a given mm. + * + * The operation is safe because the caller holds the mmap_sem + * exclusively and other threads accessing the vma cache will + * have mmap_sem held at least for read, so no extra locking + * is required to maintain the vma cache. + */ +void vmacache_flush_all(struct mm_struct *mm) +{ + struct task_struct *g, *p; + + rcu_read_lock(); + for_each_process_thread(g, p) { + /* + * Only flush the vmacache pointers as the + * mm seqnum is already set and curr's will + * be set upon invalidation when the next + * lookup is done. + */ + if (mm == p->mm) + vmacache_flush(p); + } + rcu_read_unlock(); +} + +/* + * This task may be accessing a foreign mm via (for example) + * get_user_pages()->find_vma(). The vmacache is task-local and this + * task's vmacache pertains to a different mm (ie, its own). There is + * nothing we can do here. + * + * Also handle the case where a kernel thread has adopted this mm via use_mm(). + * That kernel thread's vmacache is not applicable to this mm. + */ +static bool vmacache_valid_mm(struct mm_struct *mm) +{ + return current->mm == mm && !(current->flags & PF_KTHREAD); +} + +void vmacache_update(unsigned long addr, struct vm_area_struct *newvma) +{ + if (vmacache_valid_mm(newvma->vm_mm)) + current->vmacache[VMACACHE_HASH(addr)] = newvma; +} + +static bool vmacache_valid(struct mm_struct *mm) +{ + struct task_struct *curr; + + if (!vmacache_valid_mm(mm)) + return false; + + curr = current; + if (mm->vmacache_seqnum != curr->vmacache_seqnum) { + /* + * First attempt will always be invalid, initialize + * the new cache for this task here. + */ + curr->vmacache_seqnum = mm->vmacache_seqnum; + vmacache_flush(curr); + return false; + } + return true; +} + +struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr) +{ + int i; + + if (!vmacache_valid(mm)) + return NULL; + + for (i = 0; i < VMACACHE_SIZE; i++) { + struct vm_area_struct *vma = current->vmacache[i]; + + if (vma && vma->vm_start <= addr && vma->vm_end > addr) { + BUG_ON(vma->vm_mm != mm); + return vma; + } + } + + return NULL; +} + +#ifndef CONFIG_MMU +struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + int i; + + if (!vmacache_valid(mm)) + return NULL; + + for (i = 0; i < VMACACHE_SIZE; i++) { + struct vm_area_struct *vma = current->vmacache[i]; + + if (vma && vma->vm_start == start && vma->vm_end == end) + return vma; + } + + return NULL; +} +#endif