linux/mm/pgtable-generic.c
Vineet Gupta 6a6ac72fd6 mm,thp: khugepaged: call pte flush at the time of collapse
This showed up on ARC when running LMBench bw_mem tests as Overlapping
TLB Machine Check Exception triggered due to STLB entry (2M pages)
overlapping some NTLB entry (regular 8K page).

bw_mem 2m touches a large chunk of vaddr creating NTLB entries.  In the
interim khugepaged kicks in, collapsing the contiguous ptes into a
single pmd.  pmdp_collapse_flush()->flush_pmd_tlb_range() is called to
flush out NTLB entries for the ptes.  This for ARC (by design) can only
shootdown STLB entries (for pmd).  The stray NTLB entries cause the
overlap with the subsequent STLB entry for collapsed page.  So make
pmdp_collapse_flush() call pte flush interface not pmd flush.

Note that originally all thp flush call sites in generic code called
flush_tlb_range() leaving it to architecture to implement the flush for
pte and/or pmd.  Commit 12ebc1581a changed this by calling a new
opt-in API flush_pmd_tlb_range() which made the semantics more explicit
but failed to distinguish the pte vs pmd flush in generic code, which is
what this patch fixes.

Note that ARC can fixed w/o touching the generic pmdp_collapse_flush()
by defining a ARC version, but that defeats the purpose of generic
version, plus sementically this is the right thing to do.

Fixes STAR 9000961194: LMBench on AXS103 triggering duplicate TLB
exceptions with super pages

Fixes: 12ebc1581a ("mm,thp: introduce flush_pmd_tlb_range")
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>	[4.4]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-02-11 18:35:48 -08:00

205 lines
5.4 KiB
C

/*
* mm/pgtable-generic.c
*
* Generic pgtable methods declared in asm-generic/pgtable.h
*
* Copyright (C) 2010 Linus Torvalds
*/
#include <linux/pagemap.h>
#include <asm/tlb.h>
#include <asm-generic/pgtable.h>
/*
* If a p?d_bad entry is found while walking page tables, report
* the error, before resetting entry to p?d_none. Usually (but
* very seldom) called out from the p?d_none_or_clear_bad macros.
*/
void pgd_clear_bad(pgd_t *pgd)
{
pgd_ERROR(*pgd);
pgd_clear(pgd);
}
void pud_clear_bad(pud_t *pud)
{
pud_ERROR(*pud);
pud_clear(pud);
}
void pmd_clear_bad(pmd_t *pmd)
{
pmd_ERROR(*pmd);
pmd_clear(pmd);
}
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
/*
* Only sets the access flags (dirty, accessed), as well as write
* permission. Furthermore, we know it always gets set to a "more
* permissive" setting, which allows most architectures to optimize
* this. We return whether the PTE actually changed, which in turn
* instructs the caller to do things like update__mmu_cache. This
* used to be done in the caller, but sparc needs minor faults to
* force that call on sun4c so we changed this macro slightly
*/
int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
{
int changed = !pte_same(*ptep, entry);
if (changed) {
set_pte_at(vma->vm_mm, address, ptep, entry);
flush_tlb_fix_spurious_fault(vma, address);
}
return changed;
}
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
int young;
young = ptep_test_and_clear_young(vma, address, ptep);
if (young)
flush_tlb_page(vma, address);
return young;
}
#endif
#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long address,
pte_t *ptep)
{
struct mm_struct *mm = (vma)->vm_mm;
pte_t pte;
pte = ptep_get_and_clear(mm, address, ptep);
if (pte_accessible(mm, pte))
flush_tlb_page(vma, address);
return pte;
}
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
/*
* ARCHes with special requirements for evicting THP backing TLB entries can
* implement this. Otherwise also, it can help optimize normal TLB flush in
* THP regime. stock flush_tlb_range() typically has optimization to nuke the
* entire TLB TLB if flush span is greater than a threshhold, which will
* likely be true for a single huge page. Thus a single thp flush will
* invalidate the entire TLB which is not desitable.
* e.g. see arch/arc: flush_pmd_tlb_range
*/
#define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
#endif
#ifndef __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty)
{
int changed = !pmd_same(*pmdp, entry);
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
if (changed) {
set_pmd_at(vma->vm_mm, address, pmdp, entry);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
return changed;
}
#endif
#ifndef __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
int pmdp_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
int young;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
young = pmdp_test_and_clear_young(vma, address, pmdp);
if (young)
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return young;
}
#endif
#ifndef __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
}
#endif
#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)
{
assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */
if (!pmd_huge_pte(mm, pmdp))
INIT_LIST_HEAD(&pgtable->lru);
else
list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru);
pmd_huge_pte(mm, pmdp) = pgtable;
}
#endif
#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
/* no "address" argument so destroys page coloring of some arch */
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
pgtable_t pgtable;
assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */
pgtable = pmd_huge_pte(mm, pmdp);
pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->lru,
struct page, lru);
if (pmd_huge_pte(mm, pmdp))
list_del(&pgtable->lru);
return pgtable;
}
#endif
#ifndef __HAVE_ARCH_PMDP_INVALIDATE
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t entry = *pmdp;
set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry));
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
#endif
#ifndef pmdp_collapse_flush
pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
/*
* pmd and hugepage pte format are same. So we could
* use the same function.
*/
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(pmd_trans_huge(*pmdp));
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
/* collapse entails shooting down ptes not pmd */
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
}
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */