powerpc/8xx: Only perform perf counting when perf is in use.

In TLB miss handlers, updating the perf counter is only useful
when performing a perf analysis. As it has a noticeable overhead,
let's only do it when needed.

In order to do so, the exit of the miss handlers will be patched
when starting/stopping 'perf': the first register restore
instruction of each exit point will be replaced by a jump to
the counting code.

Once this is done, CONFIG_PPC_8xx_PERF_EVENT becomes useless as
this feature doesn't add any overhead.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
Christophe Leroy 2018-01-12 13:45:23 +01:00 committed by Michael Ellerman
parent bb9b5a8332
commit cd99ddbea2
6 changed files with 88 additions and 32 deletions

View File

@ -236,6 +236,7 @@
#define PPC_INST_RFCI 0x4c000066 #define PPC_INST_RFCI 0x4c000066
#define PPC_INST_RFDI 0x4c00004e #define PPC_INST_RFDI 0x4c00004e
#define PPC_INST_RFMCI 0x4c00004c #define PPC_INST_RFMCI 0x4c00004c
#define PPC_INST_MFSPR 0x7c0002a6
#define PPC_INST_MFSPR_DSCR 0x7c1102a6 #define PPC_INST_MFSPR_DSCR 0x7c1102a6
#define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe #define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe
#define PPC_INST_MTSPR_DSCR 0x7c1103a6 #define PPC_INST_MTSPR_DSCR 0x7c1103a6
@ -383,6 +384,7 @@
#define __PPC_ME64(s) __PPC_MB64(s) #define __PPC_ME64(s) __PPC_MB64(s)
#define __PPC_BI(s) (((s) & 0x1f) << 16) #define __PPC_BI(s) (((s) & 0x1f) << 16)
#define __PPC_CT(t) (((t) & 0x0f) << 21) #define __PPC_CT(t) (((t) & 0x0f) << 21)
#define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11))
/* /*
* Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a

View File

@ -211,7 +211,7 @@ transfer_to_handler_cont:
mflr r9 mflr r9
lwz r11,0(r9) /* virtual address of handler */ lwz r11,0(r9) /* virtual address of handler */
lwz r9,4(r9) /* where to go when done */ lwz r9,4(r9) /* where to go when done */
#ifdef CONFIG_PPC_8xx_PERF_EVENT #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0 mtspr SPRN_NRI, r0
#endif #endif
#ifdef CONFIG_TRACE_IRQFLAGS #ifdef CONFIG_TRACE_IRQFLAGS
@ -301,7 +301,7 @@ stack_ovf:
lis r9,StackOverflow@ha lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l addi r9,r9,StackOverflow@l
LOAD_MSR_KERNEL(r10,MSR_KERNEL) LOAD_MSR_KERNEL(r10,MSR_KERNEL)
#ifdef CONFIG_PPC_8xx_PERF_EVENT #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0 mtspr SPRN_NRI, r0
#endif #endif
mtspr SPRN_SRR0,r9 mtspr SPRN_SRR0,r9
@ -430,7 +430,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
lwz r7,_NIP(r1) lwz r7,_NIP(r1)
lwz r2,GPR2(r1) lwz r2,GPR2(r1)
lwz r1,GPR1(r1) lwz r1,GPR1(r1)
#ifdef CONFIG_PPC_8xx_PERF_EVENT #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0 mtspr SPRN_NRI, r0
#endif #endif
mtspr SPRN_SRR0,r7 mtspr SPRN_SRR0,r7
@ -727,7 +727,7 @@ fast_exception_return:
lwz r10,_LINK(r11) lwz r10,_LINK(r11)
mtlr r10 mtlr r10
REST_GPR(10, r11) REST_GPR(10, r11)
#ifdef CONFIG_PPC_8xx_PERF_EVENT #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0 mtspr SPRN_NRI, r0
#endif #endif
mtspr SPRN_SRR1,r9 mtspr SPRN_SRR1,r9
@ -978,7 +978,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
.globl exc_exit_restart .globl exc_exit_restart
exc_exit_restart: exc_exit_restart:
lwz r12,_NIP(r1) lwz r12,_NIP(r1)
#ifdef CONFIG_PPC_8xx_PERF_EVENT #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0 mtspr SPRN_NRI, r0
#endif #endif
mtspr SPRN_SRR0,r12 mtspr SPRN_SRR0,r12

View File

@ -304,12 +304,6 @@ InstructionTLBMiss:
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mtspr SPRN_SPRG_SCRATCH2, r12 mtspr SPRN_SPRG_SCRATCH2, r12
#endif #endif
#ifdef CONFIG_PPC_8xx_PERF_EVENT
lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
/* If we are faulting a kernel address, we have to use the /* If we are faulting a kernel address, we have to use the
* kernel page tables. * kernel page tables.
@ -392,6 +386,20 @@ _ENTRY(ITLBMiss_cmp)
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */ /* Restore registers */
_ENTRY(itlb_miss_exit_1)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
mfspr r12, SPRN_SPRG_SCRATCH2
#endif
rfi
#ifdef CONFIG_PERF_EVENTS
_ENTRY(itlb_miss_perf)
lis r10, (itlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r11, SPRN_SPRG_SCRATCH1
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
@ -429,12 +437,6 @@ DataStoreTLBMiss:
mtspr SPRN_SPRG_SCRATCH0, r10 mtspr SPRN_SPRG_SCRATCH0, r10
mtspr SPRN_SPRG_SCRATCH1, r11 mtspr SPRN_SPRG_SCRATCH1, r11
mtspr SPRN_SPRG_SCRATCH2, r12 mtspr SPRN_SPRG_SCRATCH2, r12
#ifdef CONFIG_PPC_8xx_PERF_EVENT
lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
mfcr r12 mfcr r12
/* If we are faulting a kernel address, we have to use the /* If we are faulting a kernel address, we have to use the
@ -526,6 +528,18 @@ _ENTRY(DTLBMiss_jmp)
/* Restore registers */ /* Restore registers */
mtspr SPRN_DAR, r11 /* Tag DAR */ mtspr SPRN_DAR, r11 /* Tag DAR */
_ENTRY(dtlb_miss_exit_1)
mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2
rfi
#ifdef CONFIG_PERF_EVENTS
_ENTRY(dtlb_miss_perf)
lis r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
lwz r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
addi r11, r11, 1
stw r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
#endif
mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2 mfspr r12, SPRN_SPRG_SCRATCH2
@ -635,7 +649,7 @@ DataBreakpoint:
mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r11, SPRN_SPRG_SCRATCH1
rfi rfi
#ifdef CONFIG_PPC_8xx_PERF_EVENT #ifdef CONFIG_PERF_EVENTS
. = 0x1d00 . = 0x1d00
InstructionBreakpoint: InstructionBreakpoint:
mtspr SPRN_SPRG_SCRATCH0, r10 mtspr SPRN_SPRG_SCRATCH0, r10
@ -675,6 +689,7 @@ DTLBMissIMMR:
li r11, RPN_PATTERN li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */ mtspr SPRN_DAR, r11 /* Tag DAR */
_ENTRY(dtlb_miss_exit_2)
mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2 mfspr r12, SPRN_SPRG_SCRATCH2
@ -692,6 +707,7 @@ DTLBMissLinear:
li r11, RPN_PATTERN li r11, RPN_PATTERN
mtspr SPRN_DAR, r11 /* Tag DAR */ mtspr SPRN_DAR, r11 /* Tag DAR */
_ENTRY(dtlb_miss_exit_3)
mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2 mfspr r12, SPRN_SPRG_SCRATCH2
@ -708,6 +724,7 @@ ITLBMissLinear:
_PAGE_PRESENT _PAGE_PRESENT
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
_ENTRY(itlb_miss_exit_2)
mfspr r10, SPRN_SPRG_SCRATCH0 mfspr r10, SPRN_SPRG_SCRATCH0
mfspr r11, SPRN_SPRG_SCRATCH1 mfspr r11, SPRN_SPRG_SCRATCH1
mfspr r12, SPRN_SPRG_SCRATCH2 mfspr r12, SPRN_SPRG_SCRATCH2
@ -1039,7 +1056,7 @@ initial_mmu:
#endif #endif
/* Disable debug mode entry on breakpoints */ /* Disable debug mode entry on breakpoints */
mfspr r8, SPRN_DER mfspr r8, SPRN_DER
#ifdef CONFIG_PPC_8xx_PERF_EVENT #ifdef CONFIG_PERF_EVENTS
rlwinm r8, r8, 0, ~0xc rlwinm r8, r8, 0, ~0xc
#else #else
rlwinm r8, r8, 0, ~0x8 rlwinm r8, r8, 0, ~0x8
@ -1072,7 +1089,7 @@ swapper_pg_dir:
abatron_pteptrs: abatron_pteptrs:
.space 8 .space 8
#ifdef CONFIG_PPC_8xx_PERF_EVENT #ifdef CONFIG_PERF_EVENTS
.globl itlb_miss_counter .globl itlb_miss_counter
itlb_miss_counter: itlb_miss_counter:
.space 4 .space 4

View File

@ -18,6 +18,7 @@
#include <asm/machdep.h> #include <asm/machdep.h>
#include <asm/firmware.h> #include <asm/firmware.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/code-patching.h>
#define PERF_8xx_ID_CPU_CYCLES 1 #define PERF_8xx_ID_CPU_CYCLES 1
#define PERF_8xx_ID_HW_INSTRUCTIONS 2 #define PERF_8xx_ID_HW_INSTRUCTIONS 2
@ -30,8 +31,13 @@
extern unsigned long itlb_miss_counter, dtlb_miss_counter; extern unsigned long itlb_miss_counter, dtlb_miss_counter;
extern atomic_t instruction_counter; extern atomic_t instruction_counter;
extern unsigned int itlb_miss_perf, dtlb_miss_perf;
extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;
static atomic_t insn_ctr_ref; static atomic_t insn_ctr_ref;
static atomic_t itlb_miss_ref;
static atomic_t dtlb_miss_ref;
static s64 get_insn_ctr(void) static s64 get_insn_ctr(void)
{ {
@ -96,9 +102,24 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
val = get_insn_ctr(); val = get_insn_ctr();
break; break;
case PERF_8xx_ID_ITLB_LOAD_MISS: case PERF_8xx_ID_ITLB_LOAD_MISS:
if (atomic_inc_return(&itlb_miss_ref) == 1) {
unsigned long target = (unsigned long)&itlb_miss_perf;
patch_branch(&itlb_miss_exit_1, target, 0);
#ifndef CONFIG_PIN_TLB_TEXT
patch_branch(&itlb_miss_exit_2, target, 0);
#endif
}
val = itlb_miss_counter; val = itlb_miss_counter;
break; break;
case PERF_8xx_ID_DTLB_LOAD_MISS: case PERF_8xx_ID_DTLB_LOAD_MISS:
if (atomic_inc_return(&dtlb_miss_ref) == 1) {
unsigned long target = (unsigned long)&dtlb_miss_perf;
patch_branch(&dtlb_miss_exit_1, target, 0);
patch_branch(&dtlb_miss_exit_2, target, 0);
patch_branch(&dtlb_miss_exit_3, target, 0);
}
val = dtlb_miss_counter; val = dtlb_miss_counter;
break; break;
} }
@ -143,13 +164,36 @@ static void mpc8xx_pmu_read(struct perf_event *event)
static void mpc8xx_pmu_del(struct perf_event *event, int flags) static void mpc8xx_pmu_del(struct perf_event *event, int flags)
{ {
/* mfspr r10, SPRN_SPRG_SCRATCH0 */
unsigned int insn = PPC_INST_MFSPR | __PPC_RS(R10) |
__PPC_SPR(SPRN_SPRG_SCRATCH0);
mpc8xx_pmu_read(event); mpc8xx_pmu_read(event);
if (event_type(event) != PERF_8xx_ID_HW_INSTRUCTIONS)
return;
/* If it was the last user, stop counting to avoid useles overhead */ /* If it was the last user, stop counting to avoid useles overhead */
if (atomic_dec_return(&insn_ctr_ref) == 0) switch (event_type(event)) {
mtspr(SPRN_ICTRL, 7); case PERF_8xx_ID_CPU_CYCLES:
break;
case PERF_8xx_ID_HW_INSTRUCTIONS:
if (atomic_dec_return(&insn_ctr_ref) == 0)
mtspr(SPRN_ICTRL, 7);
break;
case PERF_8xx_ID_ITLB_LOAD_MISS:
if (atomic_dec_return(&itlb_miss_ref) == 0) {
patch_instruction(&itlb_miss_exit_1, insn);
#ifndef CONFIG_PIN_TLB_TEXT
patch_instruction(&itlb_miss_exit_2, insn);
#endif
}
break;
case PERF_8xx_ID_DTLB_LOAD_MISS:
if (atomic_dec_return(&dtlb_miss_ref) == 0) {
patch_instruction(&dtlb_miss_exit_1, insn);
patch_instruction(&dtlb_miss_exit_2, insn);
patch_instruction(&dtlb_miss_exit_3, insn);
}
break;
}
} }
static struct pmu mpc8xx_pmu = { static struct pmu mpc8xx_pmu = {

View File

@ -15,7 +15,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
obj-$(CONFIG_PPC_8xx_PERF_EVENT) += 8xx-pmu.o obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
obj-$(CONFIG_PPC64) += $(obj64-y) obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y) obj-$(CONFIG_PPC32) += $(obj32-y)

View File

@ -167,13 +167,6 @@ config PPC_FPU
bool bool
default y if PPC64 default y if PPC64
config PPC_8xx_PERF_EVENT
bool "PPC 8xx perf events"
depends on PPC_8xx && PERF_EVENTS
help
This is Performance Events support for PPC 8xx. The 8xx doesn't
have a PMU but some events are emulated using 8xx features.
config FSL_EMB_PERFMON config FSL_EMB_PERFMON
bool "Freescale Embedded Perfmon" bool "Freescale Embedded Perfmon"
depends on E500 || PPC_83xx depends on E500 || PPC_83xx