linux/arch/arm/kernel/smp_tlb.c
Will Deacon f0915781bd ARM: tlb: don't perform inner-shareable invalidation for local TLB ops
Inner-shareable TLB invalidation is typically more expensive than local
(non-shareable) invalidation, so performing the broadcasting for
local_flush_tlb_* operations is a waste of cycles and needlessly
clobbers entries in the TLBs of other CPUs.

This patch introduces __flush_tlb_* versions for many of the TLB
invalidation functions, which only respect inner-shareable variants of
the invalidation instructions when presented with the TLB_V7_UIS_FULL
flag. The local version is also inlined to prevent SMP_ON_UP kernels
from missing flushes, where the __flush variant would be called with
the UP flags.

This gains us around 0.5% in hackbench scores for a dual-core A15, but I
would expect this to improve as more cores (and clusters) are added to
the equation.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Albin Tonnerre <Albin.Tonnerre@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2013-08-12 12:25:44 +01:00

178 lines
3.8 KiB
C

/*
* linux/arch/arm/kernel/smp_tlb.c
*
* Copyright (C) 2002 ARM Limited, All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/preempt.h>
#include <linux/smp.h>
#include <asm/smp_plat.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
/**********************************************************************/
/*
* TLB operations
*/
struct tlb_args {
struct vm_area_struct *ta_vma;
unsigned long ta_start;
unsigned long ta_end;
};
static inline void ipi_flush_tlb_all(void *ignored)
{
local_flush_tlb_all();
}
static inline void ipi_flush_tlb_mm(void *arg)
{
struct mm_struct *mm = (struct mm_struct *)arg;
local_flush_tlb_mm(mm);
}
static inline void ipi_flush_tlb_page(void *arg)
{
struct tlb_args *ta = (struct tlb_args *)arg;
local_flush_tlb_page(ta->ta_vma, ta->ta_start);
}
static inline void ipi_flush_tlb_kernel_page(void *arg)
{
struct tlb_args *ta = (struct tlb_args *)arg;
local_flush_tlb_kernel_page(ta->ta_start);
}
static inline void ipi_flush_tlb_range(void *arg)
{
struct tlb_args *ta = (struct tlb_args *)arg;
local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
}
static inline void ipi_flush_tlb_kernel_range(void *arg)
{
struct tlb_args *ta = (struct tlb_args *)arg;
local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
}
static inline void ipi_flush_bp_all(void *ignored)
{
local_flush_bp_all();
}
static void ipi_flush_tlb_a15_erratum(void *arg)
{
dmb();
}
static void broadcast_tlb_a15_erratum(void)
{
if (!erratum_a15_798181())
return;
dummy_flush_tlb_a15_erratum();
smp_call_function(ipi_flush_tlb_a15_erratum, NULL, 1);
}
static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
{
int this_cpu;
cpumask_t mask = { CPU_BITS_NONE };
if (!erratum_a15_798181())
return;
dummy_flush_tlb_a15_erratum();
this_cpu = get_cpu();
a15_erratum_get_cpumask(this_cpu, mm, &mask);
smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
put_cpu();
}
void flush_tlb_all(void)
{
if (tlb_ops_need_broadcast())
on_each_cpu(ipi_flush_tlb_all, NULL, 1);
else
__flush_tlb_all();
broadcast_tlb_a15_erratum();
}
void flush_tlb_mm(struct mm_struct *mm)
{
if (tlb_ops_need_broadcast())
on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
else
__flush_tlb_mm(mm);
broadcast_tlb_mm_a15_erratum(mm);
}
void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
{
if (tlb_ops_need_broadcast()) {
struct tlb_args ta;
ta.ta_vma = vma;
ta.ta_start = uaddr;
on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page,
&ta, 1);
} else
__flush_tlb_page(vma, uaddr);
broadcast_tlb_mm_a15_erratum(vma->vm_mm);
}
void flush_tlb_kernel_page(unsigned long kaddr)
{
if (tlb_ops_need_broadcast()) {
struct tlb_args ta;
ta.ta_start = kaddr;
on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
} else
__flush_tlb_kernel_page(kaddr);
broadcast_tlb_a15_erratum();
}
void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
if (tlb_ops_need_broadcast()) {
struct tlb_args ta;
ta.ta_vma = vma;
ta.ta_start = start;
ta.ta_end = end;
on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
&ta, 1);
} else
local_flush_tlb_range(vma, start, end);
broadcast_tlb_mm_a15_erratum(vma->vm_mm);
}
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
if (tlb_ops_need_broadcast()) {
struct tlb_args ta;
ta.ta_start = start;
ta.ta_end = end;
on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
} else
local_flush_tlb_kernel_range(start, end);
broadcast_tlb_a15_erratum();
}
void flush_bp_all(void)
{
if (tlb_ops_need_broadcast())
on_each_cpu(ipi_flush_bp_all, NULL, 1);
else
local_flush_bp_all();
}