mirror of
https://github.com/FEX-Emu/linux.git
synced 2025-01-08 10:30:50 +00:00
f0915781bd
Inner-shareable TLB invalidation is typically more expensive than local (non-shareable) invalidation, so performing the broadcasting for local_flush_tlb_* operations is a waste of cycles and needlessly clobbers entries in the TLBs of other CPUs. This patch introduces __flush_tlb_* versions for many of the TLB invalidation functions, which only respect inner-shareable variants of the invalidation instructions when presented with the TLB_V7_UIS_FULL flag. The local version is also inlined to prevent SMP_ON_UP kernels from missing flushes, where the __flush variant would be called with the UP flags. This gains us around 0.5% in hackbench scores for a dual-core A15, but I would expect this to improve as more cores (and clusters) are added to the equation. Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Reported-by: Albin Tonnerre <Albin.Tonnerre@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
178 lines
3.8 KiB
C
178 lines
3.8 KiB
C
/*
|
|
* linux/arch/arm/kernel/smp_tlb.c
|
|
*
|
|
* Copyright (C) 2002 ARM Limited, All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
#include <linux/preempt.h>
|
|
#include <linux/smp.h>
|
|
|
|
#include <asm/smp_plat.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/mmu_context.h>
|
|
|
|
/**********************************************************************/
|
|
|
|
/*
|
|
* TLB operations
|
|
*/
|
|
struct tlb_args {
|
|
struct vm_area_struct *ta_vma;
|
|
unsigned long ta_start;
|
|
unsigned long ta_end;
|
|
};
|
|
|
|
static inline void ipi_flush_tlb_all(void *ignored)
|
|
{
|
|
local_flush_tlb_all();
|
|
}
|
|
|
|
static inline void ipi_flush_tlb_mm(void *arg)
|
|
{
|
|
struct mm_struct *mm = (struct mm_struct *)arg;
|
|
|
|
local_flush_tlb_mm(mm);
|
|
}
|
|
|
|
static inline void ipi_flush_tlb_page(void *arg)
|
|
{
|
|
struct tlb_args *ta = (struct tlb_args *)arg;
|
|
|
|
local_flush_tlb_page(ta->ta_vma, ta->ta_start);
|
|
}
|
|
|
|
static inline void ipi_flush_tlb_kernel_page(void *arg)
|
|
{
|
|
struct tlb_args *ta = (struct tlb_args *)arg;
|
|
|
|
local_flush_tlb_kernel_page(ta->ta_start);
|
|
}
|
|
|
|
static inline void ipi_flush_tlb_range(void *arg)
|
|
{
|
|
struct tlb_args *ta = (struct tlb_args *)arg;
|
|
|
|
local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
|
|
}
|
|
|
|
static inline void ipi_flush_tlb_kernel_range(void *arg)
|
|
{
|
|
struct tlb_args *ta = (struct tlb_args *)arg;
|
|
|
|
local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end);
|
|
}
|
|
|
|
static inline void ipi_flush_bp_all(void *ignored)
|
|
{
|
|
local_flush_bp_all();
|
|
}
|
|
|
|
static void ipi_flush_tlb_a15_erratum(void *arg)
|
|
{
|
|
dmb();
|
|
}
|
|
|
|
static void broadcast_tlb_a15_erratum(void)
|
|
{
|
|
if (!erratum_a15_798181())
|
|
return;
|
|
|
|
dummy_flush_tlb_a15_erratum();
|
|
smp_call_function(ipi_flush_tlb_a15_erratum, NULL, 1);
|
|
}
|
|
|
|
static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm)
|
|
{
|
|
int this_cpu;
|
|
cpumask_t mask = { CPU_BITS_NONE };
|
|
|
|
if (!erratum_a15_798181())
|
|
return;
|
|
|
|
dummy_flush_tlb_a15_erratum();
|
|
this_cpu = get_cpu();
|
|
a15_erratum_get_cpumask(this_cpu, mm, &mask);
|
|
smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1);
|
|
put_cpu();
|
|
}
|
|
|
|
void flush_tlb_all(void)
|
|
{
|
|
if (tlb_ops_need_broadcast())
|
|
on_each_cpu(ipi_flush_tlb_all, NULL, 1);
|
|
else
|
|
__flush_tlb_all();
|
|
broadcast_tlb_a15_erratum();
|
|
}
|
|
|
|
void flush_tlb_mm(struct mm_struct *mm)
|
|
{
|
|
if (tlb_ops_need_broadcast())
|
|
on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
|
|
else
|
|
__flush_tlb_mm(mm);
|
|
broadcast_tlb_mm_a15_erratum(mm);
|
|
}
|
|
|
|
void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
|
|
{
|
|
if (tlb_ops_need_broadcast()) {
|
|
struct tlb_args ta;
|
|
ta.ta_vma = vma;
|
|
ta.ta_start = uaddr;
|
|
on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page,
|
|
&ta, 1);
|
|
} else
|
|
__flush_tlb_page(vma, uaddr);
|
|
broadcast_tlb_mm_a15_erratum(vma->vm_mm);
|
|
}
|
|
|
|
void flush_tlb_kernel_page(unsigned long kaddr)
|
|
{
|
|
if (tlb_ops_need_broadcast()) {
|
|
struct tlb_args ta;
|
|
ta.ta_start = kaddr;
|
|
on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1);
|
|
} else
|
|
__flush_tlb_kernel_page(kaddr);
|
|
broadcast_tlb_a15_erratum();
|
|
}
|
|
|
|
void flush_tlb_range(struct vm_area_struct *vma,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
if (tlb_ops_need_broadcast()) {
|
|
struct tlb_args ta;
|
|
ta.ta_vma = vma;
|
|
ta.ta_start = start;
|
|
ta.ta_end = end;
|
|
on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
|
|
&ta, 1);
|
|
} else
|
|
local_flush_tlb_range(vma, start, end);
|
|
broadcast_tlb_mm_a15_erratum(vma->vm_mm);
|
|
}
|
|
|
|
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
|
{
|
|
if (tlb_ops_need_broadcast()) {
|
|
struct tlb_args ta;
|
|
ta.ta_start = start;
|
|
ta.ta_end = end;
|
|
on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1);
|
|
} else
|
|
local_flush_tlb_kernel_range(start, end);
|
|
broadcast_tlb_a15_erratum();
|
|
}
|
|
|
|
void flush_bp_all(void)
|
|
{
|
|
if (tlb_ops_need_broadcast())
|
|
on_each_cpu(ipi_flush_bp_all, NULL, 1);
|
|
else
|
|
local_flush_bp_all();
|
|
}
|