From f8fd8c2eec345c4a0142b14340cb23eebd32b71e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 11 Feb 2015 15:27:31 -0800 Subject: [PATCH] proc/pagemap: walk page tables under pte lock Lockless access to pte in pagemap_pte_range() might race with page migration and trigger BUG_ON(!PageLocked()) in migration_entry_to_page(): CPU A (pagemap) CPU B (migration) lock_page() try_to_unmap(page, TTU_MIGRATION...) make_migration_entry() set_pte_at() pte_to_pagemap_entry() remove_migration_ptes() unlock_page() if(is_migration_entry()) migration_entry_to_page() BUG_ON(!PageLocked(page)) Also lockless read might be non-atomic if pte is larger than wordsize. Other pte walkers (smaps, numa_maps, clear_refs) already lock ptes. Change-Id: Ie9a6e67d3d5aa707a5eaaf77ceae3c08710f9da8 Fixes: 052fb0d635df ("proc: report file/anon bit in /proc/pid/pagemap") Signed-off-by: Konstantin Khlebnikov Reported-by: Andrey Ryabinin Reviewed-by: Cyrill Gorcunov Acked-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Git-commit: 05fbf357d94152171bc50f8a369390f1f16efd89 Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git [lmark@codeaurora.org: fix merge conflicts] Signed-off-by: Liam Mark --- fs/proc/task_mmu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c70bb56e9ebd..ab33e0ecde2b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -959,7 +959,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, { struct vm_area_struct *vma; struct pagemapread *pm = walk->private; - pte_t *pte; + spinlock_t *ptl; + pte_t *pte, *orig_pte; int err = 0; pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); @@ -982,7 +983,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (pmd_trans_unstable(pmd)) return 0; - for (; addr != end; addr += PAGE_SIZE) { + orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) { /* check to see if we've left 'vma' behind * and need a new, higher one */ @@ -995,15 +997,13 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, * and that it isn't a huge page vma */ if (vma && (vma->vm_start <= addr) && !is_vm_hugetlb_page(vma)) { - pte = pte_offset_map(pmd, addr); pte_to_pagemap_entry(&pme, vma, addr, *pte); - /* unmap before userspace copy */ - pte_unmap(pte); } err = add_to_pagemap(addr, &pme, pm); if (err) - return err; + break; } + pte_unmap_unlock(orig_pte, ptl); cond_resched();