proc/pagemap: walk page tables under pte lock

Lockless access to pte in pagemap_pte_range() might race with page
migration and trigger BUG_ON(!PageLocked()) in migration_entry_to_page():

CPU A (pagemap)                           CPU B (migration)
                                          lock_page()
                                          try_to_unmap(page, TTU_MIGRATION...)
                                               make_migration_entry()
                                               set_pte_at()
<read *pte>
pte_to_pagemap_entry()
                                          remove_migration_ptes()
                                          unlock_page()
    if(is_migration_entry())
        migration_entry_to_page()
            BUG_ON(!PageLocked(page))

Also lockless read might be non-atomic if pte is larger than wordsize.
Other pte walkers (smaps, numa_maps, clear_refs) already lock ptes.

Change-Id: Ie9a6e67d3d5aa707a5eaaf77ceae3c08710f9da8
Fixes: 052fb0d635 ("proc: report file/anon bit in /proc/pid/pagemap")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reported-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: <stable@vger.kernel.org>	[3.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Git-commit: 05fbf357d94152171bc50f8a369390f1f16efd89
Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
[lmark@codeaurora.org: fix merge conflicts]
Signed-off-by: Liam Mark <lmark@codeaurora.org>
This commit is contained in:
Konstantin Khlebnikov 2015-02-11 15:27:31 -08:00 committed by Liam Mark
parent 03a01a29d7
commit f8fd8c2eec

View file

@ -959,7 +959,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
{
struct vm_area_struct *vma;
struct pagemapread *pm = walk->private;
pte_t *pte;
spinlock_t *ptl;
pte_t *pte, *orig_pte;
int err = 0;
pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
@ -982,7 +983,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (pmd_trans_unstable(pmd))
return 0;
for (; addr != end; addr += PAGE_SIZE) {
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
/* check to see if we've left 'vma' behind
* and need a new, higher one */
@ -995,15 +997,13 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
* and that it isn't a huge page vma */
if (vma && (vma->vm_start <= addr) &&
!is_vm_hugetlb_page(vma)) {
pte = pte_offset_map(pmd, addr);
pte_to_pagemap_entry(&pme, vma, addr, *pte);
/* unmap before userspace copy */
pte_unmap(pte);
}
err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
break;
}
pte_unmap_unlock(orig_pte, ptl);
cond_resched();