From 8f4b326d663b92e1c2a53a6857bef42e91aea5a6 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Wed, 21 Dec 2016 14:44:46 -0800 Subject: [PATCH 01/74] arm64: Don't trace __switch_to if function graph tracer is enabled Function graph tracer shows negative time (wrap around) when tracing __switch_to if the nosleep-time trace option is enabled. Time compensation for nosleep-time is done by an ftrace probe on sched_switch. This doesn't work well for the following events (with letters representing timestamps): A - sched switch probe called for task T switch out B - __switch_to calltime is recorded C - sched_switch probe called for task T switch in D - __switch_to rettime is recorded If C - A > D - B, then we end up over compensating for the time spent in __switch_to giving rise to negative times in the trace output. On x86, __switch_to is not traced if function graph tracer is enabled. Do the same for arm64 as well. Cc: Todd Kjos Cc: Steven Rostedt Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Joel Fernandes Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index a3a2816ba73a..0b07d42819da 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -339,7 +339,7 @@ static void entry_task_switch(struct task_struct *next) /* * Thread switching. */ -struct task_struct *__switch_to(struct task_struct *prev, +__notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next) { struct task_struct *last; From 117f5727ae058de03e2407f5fe566e715d143596 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 9 Jan 2017 14:13:36 +0000 Subject: [PATCH 02/74] arm64: add missing printk newlines A few printk calls in arm64 omit a trailing newline, even though there is no subsequent KERN_CONT printk associated with them, and we actually want a newline. This can result in unrelated lines being appended, rather than appearing on a new line. Additionally, timestamp prefixes may appear in-line. This makes the logs harder to read than necessary. Avoid this by adding a trailing newline. These were found with a shortlist generated by: $ git grep 'pr\(intk\|_.*\)(.*)' -- arch/arm64 | grep -v pr_fmt | grep -v '\\n"' Signed-off-by: Mark Rutland CC: James Morse Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/armv8_deprecated.c | 2 +- arch/arm64/kernel/hibernate.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index ecf9298a12d4..86032a012388 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -636,7 +636,7 @@ static int __init armv8_deprecated_init(void) if(system_supports_mixed_endian_el0()) register_insn_emulation(&setend_ops); else - pr_info("setend instruction emulation is not supported on the system"); + pr_info("setend instruction emulation is not supported on this system\n"); } cpuhp_setup_state_nocalls(CPUHP_AP_ARM64_ISNDEP_STARTING, diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index fe301cbcb442..1b918aaed538 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -472,7 +472,7 @@ int swsusp_arch_resume(void) */ tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC); if (!tmp_pg_dir) { - pr_err("Failed to allocate memory for temporary page tables."); + pr_err("Failed to allocate memory for temporary page tables.\n"); rc = -ENOMEM; goto out; } @@ -492,7 +492,7 @@ int swsusp_arch_resume(void) */ zero_page = (void *)get_safe_page(GFP_ATOMIC); if (!zero_page) { - pr_err("Failed to allocate zero page."); + pr_err("Failed to allocate zero page.\n"); rc = -ENOMEM; goto out; } @@ -512,7 +512,7 @@ int swsusp_arch_resume(void) &phys_hibernate_exit, (void *)get_safe_page, GFP_ATOMIC); if (rc) { - pr_err("Failed to create safe executable page for hibernate_exit code."); + pr_err("Failed to create safe executable page for hibernate_exit code.\n"); goto out; } From 510224c2b10aba2149c0d24595be98f2254c641a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 9 Jan 2017 14:31:55 +0000 Subject: [PATCH 03/74] arm64: head.S: fix up stale comments In commit 23c8a500c24d02dd ("arm64: kernel: use ordinary return/argument register for el2_setup()"), we stopped using w20 as a global stash of the boot mode flag, and instead pass this around in w0 as a function parameter. Unfortunately, we missed a couple of comments, which still refer to the old convention of using w20/x20. This patch fixes up the comments to describe the code as it currently works. Signed-off-by: Mark Rutland Acked-by: Ard Biesheuvel Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 4b1abac3485a..9b0857a89a2a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -483,7 +483,7 @@ ENTRY(kimage_vaddr) * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. * - * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if * booted in EL1 or EL2 respectively. */ ENTRY(el2_setup) @@ -628,7 +628,7 @@ ENDPROC(el2_setup) /* * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed - * in x20. See arch/arm64/include/asm/virt.h for more info. + * in w0. See arch/arm64/include/asm/virt.h for more info. */ set_cpu_boot_mode_flag: adr_l x1, __boot_cpu_mode From c8b06e3fddddaae1a87ed479edcb8b3d85caecc7 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 9 Jan 2017 18:14:02 +0000 Subject: [PATCH 04/74] arm64: Remove useless UAO IPI and describe how this gets enabled Since its introduction, the UAO enable call was broken, and useless. commit 2a6dcb2b5f3e ("arm64: cpufeature: Schedule enable() calls instead of calling them via IPI"), fixed the framework so that these calls are scheduled, so that they can modify PSTATE. Now it is just useless. Remove it. UAO is enabled by the code patching which causes get_user() and friends to use the 'ldtr' family of instructions. This relies on the PSTATE.UAO bit being set to match addr_limit, which we do in uao_thread_switch() called via __switch_to(). All that is needed to enable UAO is patch the code, and call schedule(). __apply_alternatives_multi_stop() calls stop_machine() when it modifies the kernel text to enable the alternatives, (including the UAO code in uao_thread_switch()). Once stop_machine() has finished __switch_to() is called to reschedule the original task, this causes PSTATE.UAO to be set appropriately. An explicit enable() call is not needed. Reported-by: Vladimir Murzin Signed-off-by: James Morse --- arch/arm64/include/asm/processor.h | 1 - arch/arm64/kernel/cpufeature.c | 5 ++++- arch/arm64/mm/fault.c | 14 -------------- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 747c65a616ed..c97b8bd2acba 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -187,7 +187,6 @@ static inline void spin_lock_prefetch(const void *ptr) #endif int cpu_enable_pan(void *__unused); -int cpu_enable_uao(void *__unused); int cpu_enable_cache_maint_trap(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index fdf8f045929f..99f56983c415 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -806,7 +806,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, .min_field_value = 1, - .enable = cpu_enable_uao, + /* + * We rely on stop_machine() calling uao_thread_switch() to set + * UAO immediately after patching. + */ }, #endif /* CONFIG_ARM64_UAO */ #ifdef CONFIG_ARM64_PAN diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 156169c6981b..81283851c9af 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -691,17 +691,3 @@ int cpu_enable_pan(void *__unused) return 0; } #endif /* CONFIG_ARM64_PAN */ - -#ifdef CONFIG_ARM64_UAO -/* - * Kernel threads have fs=KERNEL_DS by default, and don't need to call - * set_fs(), devtmpfs in particular relies on this behaviour. - * We need to enable the feature at runtime (instead of adding it to - * PSR_MODE_EL1h) as the feature may not be implemented by the cpu. - */ -int cpu_enable_uao(void *__unused) -{ - asm(SET_PSTATE_UAO(1)); - return 0; -} -#endif /* CONFIG_ARM64_UAO */ From b20d1ba3cf4b99dc47ea3e1102399e4728386783 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 25 Jul 2016 16:17:52 +0100 Subject: [PATCH 05/74] arm64: cpufeature: allow for version discrepancy in PMU implementations Perf already supports multiple PMU instances for heterogeneous systems, so there's no need to be strict in the cpufeature checking, particularly as the PMU extension is optional in the architecture. Acked-by: Mark Rutland Reviewed-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 99f56983c415..d9714adb6a2a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -184,7 +184,11 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), - S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), + /* + * We can instantiate multiple PMU instances with different levels + * of support. + * */ + S_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), ARM64_FTR_END, From f31deaadff0d81a4963b5d816c1ae4a67296aa0c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 22 Sep 2016 11:23:07 +0100 Subject: [PATCH 06/74] arm64: cpufeature: Don't enforce system-wide SPE capability The statistical profiling extension (SPE) is an optional feature of ARMv8.1 and is unlikely to be supported by all of the CPUs in a heterogeneous system. This patch updates the cpufeature checks so that such systems are not tainted as unsupported. Acked-by: Mark Rutland Reviewed-by: Suzuki Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kernel/cpufeature.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 98ae03f8eedd..e156e7793a65 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -190,6 +190,7 @@ #define ID_AA64MMFR2_CNP_SHIFT 0 /* id_aa64dfr0 */ +#define ID_AA64DFR0_PMSVER_SHIFT 32 #define ID_AA64DFR0_CTX_CMPS_SHIFT 28 #define ID_AA64DFR0_WRPS_SHIFT 20 #define ID_AA64DFR0_BRPS_SHIFT 12 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d9714adb6a2a..e8f6dfba1973 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -180,7 +180,8 @@ static const struct arm64_ftr_bits ftr_id_mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), + ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 36, 28, 0), + ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), From b389d7997acb9c95331322c54ebf45a3bb97ff7b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 9 Jan 2017 17:28:24 +0000 Subject: [PATCH 07/74] arm64: cpufeature: treat unknown fields as RES0 Any fields not defined in an arm64_ftr_bits entry are propagated to the system-wide register value in init_cpu_ftr_reg(), and while we require that these strictly match for the sanity checks, we don't update them in update_cpu_ftr_reg(). Generally, the lack of an arm64_ftr_bits entry indicates that the bits are currently RES0 (as is the case for the upper 32 bits of all supposedly 32-bit registers). A better default would be to use zero for the system-wide value of unallocated bits, making all register checking consistent, and allowing for subsequent simplifications to the arm64_ftr_bits arrays. This patch updates init_cpu_ftr_reg() to treat unallocated bits as RES0 for the purpose of the system-wide safe value. These bits will still be sanity checked with strict match requirements, as is currently the case. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e8f6dfba1973..9cc198e81613 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -415,23 +415,33 @@ static void __init sort_ftr_regs(void) /* * Initialise the CPU feature register from Boot CPU values. * Also initiliases the strict_mask for the register. + * Any bits that are not covered by an arm64_ftr_bits entry are considered + * RES0 for the system-wide value, and must strictly match. */ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) { u64 val = 0; u64 strict_mask = ~0x0ULL; + u64 valid_mask = 0; + const struct arm64_ftr_bits *ftrp; struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); BUG_ON(!reg); for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { + u64 ftr_mask = arm64_ftr_mask(ftrp); s64 ftr_new = arm64_ftr_value(ftrp, new); val = arm64_ftr_set_value(ftrp, val, ftr_new); + + valid_mask |= ftr_mask; if (!ftrp->strict) - strict_mask &= ~arm64_ftr_mask(ftrp); + strict_mask &= ~ftr_mask; } + + val &= valid_mask; + reg->sys_val = val; reg->strict_mask = strict_mask; } From 564279ff6f1f4c5eba95f4addd6acd7b1531a653 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 9 Jan 2017 17:28:25 +0000 Subject: [PATCH 08/74] arm64: cpufeature: remove explicit RAZ fields We currently have some RAZ fields described explicitly in our arm64_ftr_bits arrays. These are inconsistently commented, grouped, and/or applied, and maintaining these is error-prone. Luckily, we don't need these at all. We'll never need to inspect RAZ fields to determine feature support, and init_cpu_ftr_reg() will ensure that any bits without a corresponding arm64_ftr_bits entry are treated as RES0 with strict matching requirements. In check_update_ftr_reg() we'll then compare these bits from the relevant cpuinfo_arm64 structures, and need not store them in a arm64_ftr_reg. This patch removes the unnecessary arm64_ftr_bits entries for RES0 bits. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9cc198e81613..09c33722fe60 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -81,21 +81,16 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), @@ -108,7 +103,6 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), @@ -126,7 +120,6 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0), @@ -147,7 +140,6 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ @@ -157,7 +149,6 @@ static const struct arm64_ftr_bits ftr_ctr[] = { * If we have differing I-cache policies, report it as the weakest - AIVIVT. */ ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; @@ -196,14 +187,12 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { }; static const struct arm64_ftr_bits ftr_mvfr2[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_dczid[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ ARM64_FTR_END, @@ -212,7 +201,6 @@ static const struct arm64_ftr_bits ftr_dczid[] = { static const struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0), @@ -222,14 +210,11 @@ static const struct arm64_ftr_bits ftr_id_isar5[] = { }; static const struct arm64_ftr_bits ftr_id_mmfr4[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* RAZ */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_pfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0), /* RAZ */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */ From eab43e88734f4ef8cb5ec40827bf784400ed8ffc Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 9 Jan 2017 17:28:26 +0000 Subject: [PATCH 09/74] arm64: cpufeature: Cleanup feature bit tables This patch does the following clean ups : 1) All undescribed fields of a register are now treated as 'strict' with a safe value of 0. Hence we could leave an empty table for describing registers which are RAZ. 2) ID_AA64DFR1_EL1 is RAZ and should use the table for RAZ register. 3) ftr_generic32 is used to represent a register with a 32bit feature value. Rename this to ftr_singl32 to make it more obvious. Since we don't have a 64bit singe feature register, kill ftr_generic. Based on a patch by Mark Rutland. Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Mark Rutland Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 09c33722fe60..06577bac4ccd 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -252,18 +252,13 @@ static const struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_END, }; -static const struct arm64_ftr_bits ftr_generic[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), - ARM64_FTR_END, -}; - -static const struct arm64_ftr_bits ftr_generic32[] = { +/* Table for a single 32bit feature value */ +static const struct arm64_ftr_bits ftr_single32[] = { ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0), ARM64_FTR_END, }; -static const struct arm64_ftr_bits ftr_aa64raz[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0), +static const struct arm64_ftr_bits ftr_raz[] = { ARM64_FTR_END, }; @@ -304,15 +299,15 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_aa64raz), + ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), - ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_generic), + ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), - ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_aa64raz), + ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -324,7 +319,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), /* Op1 = 3, CRn = 14, CRm = 0 */ - ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_generic32), + ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_single32), }; static int search_cmp_ftr_reg(const void *id, const void *regp) From 156e0d57f8bc9a4c0f09f4f79af7b51c6d7d0600 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 9 Jan 2017 17:28:27 +0000 Subject: [PATCH 10/74] arm64: cpufeature: Document the rules of safe value for features Document the rules for choosing the safe value for different types of features. Cc: Dave Martin Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b4989df48670..10d56242e649 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -29,7 +29,20 @@ #include #include -/* CPU feature register tracking */ +/* + * CPU feature register tracking + * + * The safe value of a CPUID feature field is dependent on the implications + * of the values assigned to it by the architecture. Based on the relationship + * between the values, the features are classified into 3 types - LOWER_SAFE, + * HIGHER_SAFE and EXACT. + * + * The lowest value of all the CPUs is chosen for LOWER_SAFE and highest + * for HIGHER_SAFE. It is expected that all CPUs have the same value for + * a field when EXACT is specified, failing which, the safe value specified + * in the table is chosen. + */ + enum ftr_type { FTR_EXACT, /* Use a predefined safe value */ FTR_LOWER_SAFE, /* Smaller value is safe */ From c9ee0f98662a6e358b5f2969755c938ce9c3a63a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 9 Jan 2017 17:28:28 +0000 Subject: [PATCH 11/74] arm64: cpufeature: Define helpers for sys_reg id Define helper macros to extract op0, op1, CRn, CRm & op2 for a given sys_reg id. While at it remove the explicit masking only used for Op0. Cc: Catalin Marinas Cc: Mark Rutland Cc: Will Deacon Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e156e7793a65..36ae882e2dcc 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -32,8 +32,27 @@ * [11-8] : CRm * [7-5] : Op2 */ +#define Op0_shift 19 +#define Op0_mask 0x3 +#define Op1_shift 16 +#define Op1_mask 0x7 +#define CRn_shift 12 +#define CRn_mask 0xf +#define CRm_shift 8 +#define CRm_mask 0xf +#define Op2_shift 5 +#define Op2_mask 0x7 + #define sys_reg(op0, op1, crn, crm, op2) \ - ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) + (((op0) << Op0_shift) | ((op1) << Op1_shift) | \ + ((crn) << CRn_shift) | ((crm) << CRm_shift) | \ + ((op2) << Op2_shift)) + +#define sys_reg_Op0(id) (((id) >> Op0_shift) & Op0_mask) +#define sys_reg_Op1(id) (((id) >> Op1_shift) & Op1_mask) +#define sys_reg_CRn(id) (((id) >> CRn_shift) & CRn_mask) +#define sys_reg_CRm(id) (((id) >> CRm_shift) & CRm_mask) +#define sys_reg_Op2(id) (((id) >> Op2_shift) & Op2_mask) #ifndef CONFIG_BROKEN_GAS_INST From 8c2dcbd2c4443bad0b4242fb62baa47b260b8f79 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 9 Jan 2017 17:28:29 +0000 Subject: [PATCH 12/74] arm64: Add helper to decode register from instruction Add a helper to extract the register field from a given instruction. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/insn.h | 2 ++ arch/arm64/kernel/insn.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index bc853663dd51..aecc07e09a18 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -332,6 +332,8 @@ bool aarch64_insn_is_branch(u32 insn); u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn); u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, u32 insn, u64 imm); +u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type, + u32 insn); u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, enum aarch64_insn_branch_type type); u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 94b62c1fa4df..1f44cf8c73e6 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -417,6 +417,35 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, return insn; } +u32 aarch64_insn_decode_register(enum aarch64_insn_register_type type, + u32 insn) +{ + int shift; + + switch (type) { + case AARCH64_INSN_REGTYPE_RT: + case AARCH64_INSN_REGTYPE_RD: + shift = 0; + break; + case AARCH64_INSN_REGTYPE_RN: + shift = 5; + break; + case AARCH64_INSN_REGTYPE_RT2: + case AARCH64_INSN_REGTYPE_RA: + shift = 10; + break; + case AARCH64_INSN_REGTYPE_RM: + shift = 16; + break; + default: + pr_err("%s: unknown register type encoding %d\n", __func__, + type); + return 0; + } + + return (insn >> shift) & GENMASK(4, 0); +} + static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type, u32 insn, enum aarch64_insn_register reg) From fe4fbdbcddeaab58a4f9b5297f28b8a4babf6f1f Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 9 Jan 2017 17:28:30 +0000 Subject: [PATCH 13/74] arm64: cpufeature: Track user visible fields Track the user visible fields of a CPU feature register. This will be used for exposing the value to the userspace. All the user visible fields of a feature register will be passed on as it is, while the others would be filled with their respective safe value. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 11 ++ arch/arm64/kernel/cpufeature.c | 193 +++++++++++++++------------- arch/arm64/kernel/traps.c | 2 +- 3 files changed, 113 insertions(+), 93 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 10d56242e649..4ce82ed3e7c3 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -55,8 +55,12 @@ enum ftr_type { #define FTR_SIGNED true /* Value should be treated as signed */ #define FTR_UNSIGNED false /* Value should be treated as unsigned */ +#define FTR_VISIBLE true /* Feature visible to the user space */ +#define FTR_HIDDEN false /* Feature is hidden from the user */ + struct arm64_ftr_bits { bool sign; /* Value is signed ? */ + bool visible; bool strict; /* CPU Sanity check: strict matching required ? */ enum ftr_type type; u8 shift; @@ -72,7 +76,9 @@ struct arm64_ftr_bits { struct arm64_ftr_reg { const char *name; u64 strict_mask; + u64 user_mask; u64 sys_val; + u64 user_val; const struct arm64_ftr_bits *ftr_bits; }; @@ -172,6 +178,11 @@ static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp) return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); } +static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg) +{ + return (reg->user_val | (reg->sys_val & reg->user_mask)); +} + static inline int __attribute_const__ cpuid_feature_extract_field(u64 features, int field, bool sign) { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 06577bac4ccd..9de44807bd34 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -52,9 +52,10 @@ EXPORT_SYMBOL(cpu_hwcaps); DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcap_keys); -#define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ +#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ .sign = SIGNED, \ + .visible = VISIBLE, \ .strict = STRICT, \ .type = TYPE, \ .shift = SHIFT, \ @@ -63,12 +64,12 @@ EXPORT_SYMBOL(cpu_hwcap_keys); } /* Define a feature with unsigned values */ -#define ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ - __ARM64_FTR_BITS(FTR_UNSIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) +#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) /* Define a feature with a signed value */ -#define S_ARM64_FTR_BITS(STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ - __ARM64_FTR_BITS(FTR_SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) +#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) #define ARM64_FTR_END \ { \ @@ -81,75 +82,75 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_AES_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), - S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), - S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), + S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), /* Linux doesn't care about the EL3 */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), - S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), /* Linux shouldn't care about secure memory */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0), /* * Differing PARange is fine as long as all peripherals and memory are mapped * within the minimum PARange of all CPUs */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_PARANGE_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_ctr[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ - ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RAO */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0), /* CWG */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), /* ERG */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1), /* DminLine */ /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. * If we have differing I-cache policies, report it as the weakest - AIVIVT. */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT), /* L1Ip */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* IminLine */ ARM64_FTR_END, }; @@ -159,78 +160,78 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { - S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ - S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 36, 28, 0), - ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 36, 28, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_BRPS_SHIFT, 4, 0), /* * We can instantiate multiple PMU instances with different levels * of support. - * */ - S_ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), + */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_mvfr2[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_dczid[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 4, 1, 1), /* DZP */ + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* BS */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_isar5[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_mmfr4[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_pfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */ - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_dfr0[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), - S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), ARM64_FTR_END, }; @@ -241,20 +242,20 @@ static const struct arm64_ftr_bits ftr_id_dfr0[] = { * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] */ static const struct arm64_ftr_bits ftr_generic_32bits[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), - ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), ARM64_FTR_END, }; /* Table for a single 32bit feature value */ static const struct arm64_ftr_bits ftr_single32[] = { - ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 32, 0), ARM64_FTR_END, }; @@ -402,6 +403,7 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) { u64 val = 0; u64 strict_mask = ~0x0ULL; + u64 user_mask = 0; u64 valid_mask = 0; const struct arm64_ftr_bits *ftrp; @@ -418,12 +420,19 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) valid_mask |= ftr_mask; if (!ftrp->strict) strict_mask &= ~ftr_mask; + if (ftrp->visible) + user_mask |= ftr_mask; + else + reg->user_val = arm64_ftr_set_value(ftrp, + reg->user_val, + ftrp->safe_val); } val &= valid_mask; reg->sys_val = val; reg->strict_mask = strict_mask; + reg->user_mask = user_mask; } void __init init_cpu_features(struct cpuinfo_arm64 *info) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5b830be79c01..8187229eb802 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -496,7 +496,7 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) { int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; - regs->regs[rt] = arm64_ftr_reg_ctrel0.sys_val; + regs->regs[rt] = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); regs->pc += 4; } From fa5b6ec9e5274aeae2326e25995506a953e5f878 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:40 -0800 Subject: [PATCH 14/74] lib/Kconfig.debug: Add ARCH_HAS_DEBUG_VIRTUAL DEBUG_VIRTUAL currently depends on DEBUG_KERNEL && X86. arm64 is getting the same support. Rather than add a list of architectures, switch this to ARCH_HAS_DEBUG_VIRTUAL and let architectures select it as appropriate. Acked-by: Ingo Molnar Reviewed-by: Mark Rutland Tested-by: Mark Rutland Suggested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/x86/Kconfig | 1 + lib/Kconfig.debug | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e487493bbd47..f1d4e8f2131f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -46,6 +46,7 @@ config X86 select ARCH_CLOCKSOURCE_DATA select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b06848a104e6..2aed31608b86 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -622,9 +622,12 @@ config DEBUG_VM_PGFLAGS If unsure, say N. +config ARCH_HAS_DEBUG_VIRTUAL + bool + config DEBUG_VIRTUAL bool "Debug VM translations" - depends on DEBUG_KERNEL && X86 + depends on DEBUG_KERNEL && ARCH_HAS_DEBUG_VIRTUAL help Enable some costly sanity checks in virtual to page code. This can catch mistakes with virt_to_page() and friends. From 2dece445b6dbdaa3d94f38ef44aa1b63bc2a6bb9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:41 -0800 Subject: [PATCH 15/74] mm/cma: Cleanup highmem check 6b101e2a3ce4 ("mm/CMA: fix boot regression due to physical address of high_memory") added checks to use __pa_nodebug on x86 since CONFIG_DEBUG_VIRTUAL complains about high_memory not being linearlly mapped. arm64 is now getting support for CONFIG_DEBUG_VIRTUAL as well. Rather than add an explosion of arches to the #ifdef, switch to an alternate method to calculate the physical start of highmem using the page before highmem starts. This avoids the need for the #ifdef and extra __pa_nodebug calls. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- mm/cma.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/mm/cma.c b/mm/cma.c index c960459eda7e..94b3460cd608 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -235,18 +235,13 @@ int __init cma_declare_contiguous(phys_addr_t base, phys_addr_t highmem_start; int ret = 0; -#ifdef CONFIG_X86 /* - * high_memory isn't direct mapped memory so retrieving its physical - * address isn't appropriate. But it would be useful to check the - * physical address of the highmem boundary so it's justifiable to get - * the physical address from it. On x86 there is a validation check for - * this case, so the following workaround is needed to avoid it. + * We can't use __pa(high_memory) directly, since high_memory + * isn't a valid direct map VA, and DEBUG_VIRTUAL will (validly) + * complain. Find the boundary by adding one to the last valid + * address. */ - highmem_start = __pa_nodebug(high_memory); -#else - highmem_start = __pa(high_memory); -#endif + highmem_start = __pa(high_memory - 1) + 1; pr_debug("%s(size %pa, base %pa, limit %pa alignment %pa)\n", __func__, &size, &base, &limit, &alignment); From 568c5fe5a54f2654f5a4c599c45b8a62ed9a2013 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:42 -0800 Subject: [PATCH 16/74] mm: Introduce lm_alias Certain architectures may have the kernel image mapped separately to alias the linear map. Introduce a macro lm_alias to translate a kernel image symbol into its linear alias. This is used in part with work to add CONFIG_DEBUG_VIRTUAL support for arm64. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index fe6b4036664a..5dc9c4650522 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -76,6 +76,10 @@ extern int mmap_rnd_compat_bits __read_mostly; #define page_to_virt(x) __va(PFN_PHYS(page_to_pfn(x))) #endif +#ifndef lm_alias +#define lm_alias(x) __va(__pa_symbol(x)) +#endif + /* * To prevent common memory management code establishing * a zero page mapping on a read fault. From b6e92aa81038ce57d298a87b4c44285a1d456c3e Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:43 -0800 Subject: [PATCH 17/74] kexec: Switch to __pa_symbol __pa_symbol is the correct api to get the physical address of kernel symbols. Switch to it to allow for better debug checking. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Acked-by: "Eric W. Biederman" Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- kernel/kexec_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 5617cc412444..a01974e1bf6b 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1399,7 +1399,7 @@ void __weak arch_crash_save_vmcoreinfo(void) phys_addr_t __weak paddr_vmcoreinfo_note(void) { - return __pa((unsigned long)(char *)&vmcoreinfo_note); + return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note); } static int __init crash_save_vmcoreinfo_init(void) From 5c6a84a3f4558a6115fef1b59343c7ae56b3abc3 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:44 -0800 Subject: [PATCH 18/74] mm/kasan: Switch to using __pa_symbol and lm_alias __pa_symbol is the correct API to find the physical address of symbols. Switch to it to allow for debugging APIs to work correctly. Other functions such as p*d_populate may call __pa internally. Ensure that the address passed is in the linear region by calling lm_alias. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- mm/kasan/kasan_init.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 3f9a41cf0ac6..31238dad85fb 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,7 @@ static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr, pte_t *pte = pte_offset_kernel(pmd, addr); pte_t zero_pte; - zero_pte = pfn_pte(PFN_DOWN(__pa(kasan_zero_page)), PAGE_KERNEL); + zero_pte = pfn_pte(PFN_DOWN(__pa_symbol(kasan_zero_page)), PAGE_KERNEL); zero_pte = pte_wrprotect(zero_pte); while (addr + PAGE_SIZE <= end) { @@ -69,7 +70,7 @@ static void __init zero_pmd_populate(pud_t *pud, unsigned long addr, next = pmd_addr_end(addr, end); if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) { - pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); continue; } @@ -92,9 +93,9 @@ static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr, if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) { pmd_t *pmd; - pud_populate(&init_mm, pud, kasan_zero_pmd); + pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd)); pmd = pmd_offset(pud, addr); - pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); continue; } @@ -135,11 +136,11 @@ void __init kasan_populate_zero_shadow(const void *shadow_start, * puds,pmds, so pgd_populate(), pud_populate() * is noops. */ - pgd_populate(&init_mm, pgd, kasan_zero_pud); + pgd_populate(&init_mm, pgd, lm_alias(kasan_zero_pud)); pud = pud_offset(pgd, addr); - pud_populate(&init_mm, pud, kasan_zero_pmd); + pud_populate(&init_mm, pud, lm_alias(kasan_zero_pmd)); pmd = pmd_offset(pud, addr); - pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + pmd_populate_kernel(&init_mm, pmd, lm_alias(kasan_zero_pte)); continue; } From 46f6236aa1c48e06b10a2fe0386ac4114b438622 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:45 -0800 Subject: [PATCH 19/74] mm/usercopy: Switch to using lm_alias The usercopy checking code currently calls __va(__pa(...)) to check for aliases on symbols. Switch to using lm_alias instead. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Acked-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- mm/usercopy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 3c8da0af9695..8345299e3e3b 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -108,13 +108,13 @@ static inline const char *check_kernel_text_object(const void *ptr, * __pa() is not just the reverse of __va(). This can be detected * and checked: */ - textlow_linear = (unsigned long)__va(__pa(textlow)); + textlow_linear = (unsigned long)lm_alias(textlow); /* No different mapping: we're done. */ if (textlow_linear == textlow) return NULL; /* Check the secondary mapping... */ - texthigh_linear = (unsigned long)__va(__pa(texthigh)); + texthigh_linear = (unsigned long)lm_alias(texthigh); if (overlaps(ptr, n, textlow_linear, texthigh_linear)) return ""; From 1a08e3d9e0ac4577ba89dbdb38f593fe050f88fc Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:46 -0800 Subject: [PATCH 20/74] drivers: firmware: psci: Use __pa_symbol for kernel symbol __pa_symbol is technically the macro that should be used for kernel symbols. Switch to this as a pre-requisite for DEBUG_VIRTUAL which will do bounds checking. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- drivers/firmware/psci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c index 6c60a5087caf..66a8793f3b37 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci.c @@ -383,7 +383,7 @@ static int psci_suspend_finisher(unsigned long index) u32 *state = __this_cpu_read(psci_power_state); return psci_ops.cpu_suspend(state[index - 1], - virt_to_phys(cpu_resume)); + __pa_symbol(cpu_resume)); } int psci_cpu_suspend_enter(unsigned long index) From 77c97b4ee21290f5f083173d957843b615abbff2 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 9 Jan 2017 17:28:31 +0000 Subject: [PATCH 21/74] arm64: cpufeature: Expose CPUID registers by emulation This patch adds the hook for emulating MRS instruction to export the 'user visible' value of supported system registers. We emulate only the following id space for system registers: Op0=3, Op1=0, CRn=0, CRm=[0, 4-7] The rest will fall back to SIGILL. This capability is also advertised via a new HWCAP_CPUID. Cc: Catalin Marinas Cc: Mark Rutland Cc: Will Deacon Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose [will: add missing static keyword to enable_mrs_emulation] Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 4 ++ arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 101 ++++++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 1 + 4 files changed, 107 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 36ae882e2dcc..ac24b6e798b1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -265,6 +265,10 @@ #define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED #endif + +/* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ +#define SYS_MPIDR_SAFE_VAL (1UL << 31) + #ifdef __ASSEMBLY__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index a739287ef6a3..773c90b05458 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -30,5 +30,6 @@ #define HWCAP_ATOMICS (1 << 8) #define HWCAP_FPHP (1 << 9) #define HWCAP_ASIMDHP (1 << 10) +#define HWCAP_CPUID (1 << 11) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9de44807bd34..8abe6ea90793 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -29,6 +29,7 @@ #include #include #include +#include #include unsigned long elf_hwcap __read_mostly; @@ -940,6 +941,8 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) { + /* We support emulation of accesses to CPU ID feature registers */ + elf_hwcap |= HWCAP_CPUID; for (; hwcaps->matches; hwcaps++) if (hwcaps->matches(hwcaps, hwcaps->def_scope)) cap_set_elf_hwcap(hwcaps); @@ -1127,3 +1130,101 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) { return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO)); } + +/* + * We emulate only the following system register space. + * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 4 - 7] + * See Table C5-6 System instruction encodings for System register accesses, + * ARMv8 ARM(ARM DDI 0487A.f) for more details. + */ +static inline bool __attribute_const__ is_emulated(u32 id) +{ + return (sys_reg_Op0(id) == 0x3 && + sys_reg_CRn(id) == 0x0 && + sys_reg_Op1(id) == 0x0 && + (sys_reg_CRm(id) == 0 || + ((sys_reg_CRm(id) >= 4) && (sys_reg_CRm(id) <= 7)))); +} + +/* + * With CRm == 0, reg should be one of : + * MIDR_EL1, MPIDR_EL1 or REVIDR_EL1. + */ +static inline int emulate_id_reg(u32 id, u64 *valp) +{ + switch (id) { + case SYS_MIDR_EL1: + *valp = read_cpuid_id(); + break; + case SYS_MPIDR_EL1: + *valp = SYS_MPIDR_SAFE_VAL; + break; + case SYS_REVIDR_EL1: + /* IMPLEMENTATION DEFINED values are emulated with 0 */ + *valp = 0; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int emulate_sys_reg(u32 id, u64 *valp) +{ + struct arm64_ftr_reg *regp; + + if (!is_emulated(id)) + return -EINVAL; + + if (sys_reg_CRm(id) == 0) + return emulate_id_reg(id, valp); + + regp = get_arm64_ftr_reg(id); + if (regp) + *valp = arm64_ftr_reg_user_value(regp); + else + /* + * The untracked registers are either IMPLEMENTATION DEFINED + * (e.g, ID_AFR0_EL1) or reserved RAZ. + */ + *valp = 0; + return 0; +} + +static int emulate_mrs(struct pt_regs *regs, u32 insn) +{ + int rc; + u32 sys_reg, dst; + u64 val; + + /* + * sys_reg values are defined as used in mrs/msr instruction. + * shift the imm value to get the encoding. + */ + sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; + rc = emulate_sys_reg(sys_reg, &val); + if (!rc) { + dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); + regs->user_regs.regs[dst] = val; + regs->pc += 4; + } + + return rc; +} + +static struct undef_hook mrs_hook = { + .instr_mask = 0xfff00000, + .instr_val = 0xd5300000, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = PSR_MODE_EL0t, + .fn = emulate_mrs, +}; + +static int __init enable_mrs_emulation(void) +{ + register_undef_hook(&mrs_hook); + return 0; +} + +late_initcall(enable_mrs_emulation); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 7b7be71e87bf..4d44edd2b140 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -63,6 +63,7 @@ static const char *const hwcap_str[] = { "atomics", "fphp", "asimdhp", + "cpuid", NULL }; From 4aa8a472c33f4024381ade56bef008a9c566366a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Mon, 9 Jan 2017 17:28:32 +0000 Subject: [PATCH 22/74] arm64: Documentation - Expose CPU feature registers Documentation for the infrastructure to expose CPU feature register by emulating MRS. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Dave Martin Reviewed-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- Documentation/arm64/cpu-feature-registers.txt | 240 ++++++++++++++++++ arch/arm64/kernel/cpufeature.c | 4 + 2 files changed, 244 insertions(+) create mode 100644 Documentation/arm64/cpu-feature-registers.txt diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt new file mode 100644 index 000000000000..61ca21ebef1a --- /dev/null +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -0,0 +1,240 @@ + ARM64 CPU Feature Registers + =========================== + +Author: Suzuki K Poulose + + +This file describes the ABI for exporting the AArch64 CPU ID/feature +registers to userspace. The availability of this ABI is advertised +via the HWCAP_CPUID in HWCAPs. + +1. Motivation +--------------- + +The ARM architecture defines a set of feature registers, which describe +the capabilities of the CPU/system. Access to these system registers is +restricted from EL0 and there is no reliable way for an application to +extract this information to make better decisions at runtime. There is +limited information available to the application via HWCAPs, however +there are some issues with their usage. + + a) Any change to the HWCAPs requires an update to userspace (e.g libc) + to detect the new changes, which can take a long time to appear in + distributions. Exposing the registers allows applications to get the + information without requiring updates to the toolchains. + + b) Access to HWCAPs is sometimes limited (e.g prior to libc, or + when ld is initialised at startup time). + + c) HWCAPs cannot represent non-boolean information effectively. The + architecture defines a canonical format for representing features + in the ID registers; this is well defined and is capable of + representing all valid architecture variations. + + +2. Requirements +----------------- + + a) Safety : + Applications should be able to use the information provided by the + infrastructure to run safely across the system. This has greater + implications on a system with heterogeneous CPUs. + The infrastructure exports a value that is safe across all the + available CPU on the system. + + e.g, If at least one CPU doesn't implement CRC32 instructions, while + others do, we should report that the CRC32 is not implemented. + Otherwise an application could crash when scheduled on the CPU + which doesn't support CRC32. + + b) Security : + Applications should only be able to receive information that is + relevant to the normal operation in userspace. Hence, some of the + fields are masked out(i.e, made invisible) and their values are set to + indicate the feature is 'not supported'. See Section 4 for the list + of visible features. Also, the kernel may manipulate the fields + based on what it supports. e.g, If FP is not supported by the + kernel, the values could indicate that the FP is not available + (even when the CPU provides it). + + c) Implementation Defined Features + The infrastructure doesn't expose any register which is + IMPLEMENTATION DEFINED as per ARMv8-A Architecture. + + d) CPU Identification : + MIDR_EL1 is exposed to help identify the processor. On a + heterogeneous system, this could be racy (just like getcpu()). The + process could be migrated to another CPU by the time it uses the + register value, unless the CPU affinity is set. Hence, there is no + guarantee that the value reflects the processor that it is + currently executing on. The REVIDR is not exposed due to this + constraint, as REVIDR makes sense only in conjunction with the + MIDR. Alternately, MIDR_EL1 and REVIDR_EL1 are exposed via sysfs + at: + + /sys/devices/system/cpu/cpu$ID/regs/identification/ + \- midr + \- revidr + +3. Implementation +-------------------- + +The infrastructure is built on the emulation of the 'MRS' instruction. +Accessing a restricted system register from an application generates an +exception and ends up in SIGILL being delivered to the process. +The infrastructure hooks into the exception handler and emulates the +operation if the source belongs to the supported system register space. + +The infrastructure emulates only the following system register space: + Op0=3, Op1=0, CRn=0, CRm=0,4,5,6,7 + +(See Table C5-6 'System instruction encodings for non-Debug System +register accesses' in ARMv8 ARM DDI 0487A.h, for the list of +registers). + +The following rules are applied to the value returned by the +infrastructure: + + a) The value of an 'IMPLEMENTATION DEFINED' field is set to 0. + b) The value of a reserved field is populated with the reserved + value as defined by the architecture. + c) The value of a 'visible' field holds the system wide safe value + for the particular feature (except for MIDR_EL1, see section 4). + d) All other fields (i.e, invisible fields) are set to indicate + the feature is missing (as defined by the architecture). + +4. List of registers with visible features +------------------------------------------- + + 1) ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + x--------------------------------------------------x + | Name | bits | visible | + |--------------------------------------------------| + | RES0 | [63-32] | n | + |--------------------------------------------------| + | RDM | [31-28] | y | + |--------------------------------------------------| + | ATOMICS | [23-20] | y | + |--------------------------------------------------| + | CRC32 | [19-16] | y | + |--------------------------------------------------| + | SHA2 | [15-12] | y | + |--------------------------------------------------| + | SHA1 | [11-8] | y | + |--------------------------------------------------| + | AES | [7-4] | y | + |--------------------------------------------------| + | RES0 | [3-0] | n | + x--------------------------------------------------x + + + 2) ID_AA64PFR0_EL1 - Processor Feature Register 0 + x--------------------------------------------------x + | Name | bits | visible | + |--------------------------------------------------| + | RES0 | [63-28] | n | + |--------------------------------------------------| + | GIC | [27-24] | n | + |--------------------------------------------------| + | AdvSIMD | [23-20] | y | + |--------------------------------------------------| + | FP | [19-16] | y | + |--------------------------------------------------| + | EL3 | [15-12] | n | + |--------------------------------------------------| + | EL2 | [11-8] | n | + |--------------------------------------------------| + | EL1 | [7-4] | n | + |--------------------------------------------------| + | EL0 | [3-0] | n | + x--------------------------------------------------x + + + 3) MIDR_EL1 - Main ID Register + x--------------------------------------------------x + | Name | bits | visible | + |--------------------------------------------------| + | Implementer | [31-24] | y | + |--------------------------------------------------| + | Variant | [23-20] | y | + |--------------------------------------------------| + | Architecture | [19-16] | y | + |--------------------------------------------------| + | PartNum | [15-4] | y | + |--------------------------------------------------| + | Revision | [3-0] | y | + x--------------------------------------------------x + + NOTE: The 'visible' fields of MIDR_EL1 will contain the value + as available on the CPU where it is fetched and is not a system + wide safe value. + +Appendix I: Example +--------------------------- + +/* + * Sample program to demonstrate the MRS emulation ABI. + * + * Copyright (C) 2015-2016, ARM Ltd + * + * Author: Suzuki K Poulose + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include + +#define get_cpu_ftr(id) ({ \ + unsigned long __val; \ + asm("mrs %0, "#id : "=r" (__val)); \ + printf("%-20s: 0x%016lx\n", #id, __val); \ + }) + +int main(void) +{ + + if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) { + fputs("CPUID registers unavailable\n", stderr); + return 1; + } + + get_cpu_ftr(ID_AA64ISAR0_EL1); + get_cpu_ftr(ID_AA64ISAR1_EL1); + get_cpu_ftr(ID_AA64MMFR0_EL1); + get_cpu_ftr(ID_AA64MMFR1_EL1); + get_cpu_ftr(ID_AA64PFR0_EL1); + get_cpu_ftr(ID_AA64PFR1_EL1); + get_cpu_ftr(ID_AA64DFR0_EL1); + get_cpu_ftr(ID_AA64DFR1_EL1); + + get_cpu_ftr(MIDR_EL1); + get_cpu_ftr(MPIDR_EL1); + get_cpu_ftr(REVIDR_EL1); + +#if 0 + /* Unexposed register access causes SIGILL */ + get_cpu_ftr(ID_MMFR0_EL1); +#endif + + return 0; +} + + + diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8abe6ea90793..d249f4391078 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -82,6 +82,10 @@ static bool __maybe_unused cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); +/* + * NOTE: Any changes to the visibility of features should be kept in + * sync with the documentation of the CPU feature register ABI. + */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), From 9e22eb616f861a25e3123d1221bad61f8f132913 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:47 -0800 Subject: [PATCH 23/74] arm64: Move some macros under #ifndef __ASSEMBLY__ Several macros for various x_to_y exist outside the bounds of an __ASSEMBLY__ guard. Move them in preparation for support for CONFIG_DEBUG_VIRTUAL. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 38 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index bfe632808d77..f80a8e403847 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -101,25 +101,6 @@ #define KASAN_SHADOW_SIZE (0) #endif -/* - * Physical vs virtual RAM address space conversion. These are - * private definitions which should NOT be used outside memory.h - * files. Use virt_to_phys/phys_to_virt/__pa/__va instead. - */ -#define __virt_to_phys(x) ({ \ - phys_addr_t __x = (phys_addr_t)(x); \ - __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \ - (__x - kimage_voffset); }) - -#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) -#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) - -/* - * Convert a page to/from a physical address - */ -#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) -#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) - /* * Memory types available. */ @@ -186,6 +167,25 @@ static inline unsigned long kaslr_offset(void) */ #define PHYS_PFN_OFFSET (PHYS_OFFSET >> PAGE_SHIFT) +/* + * Physical vs virtual RAM address space conversion. These are + * private definitions which should NOT be used outside memory.h + * files. Use virt_to_phys/phys_to_virt/__pa/__va instead. + */ +#define __virt_to_phys(x) ({ \ + phys_addr_t __x = (phys_addr_t)(x); \ + __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \ + (__x - kimage_voffset); }) + +#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) +#define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) + +/* + * Convert a page to/from a physical address + */ +#define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) +#define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) + /* * Note: Drivers should NOT use these. They are the wrong * translation for translating DMA addresses. Use the driver From 869dcfd10dfe59484cf14acddf264656d5ee94ea Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:48 -0800 Subject: [PATCH 24/74] arm64: Add cast for virt_to_pfn virt_to_pfn lacks a cast at the top level. Don't rely on __virt_to_phys and explicitly cast to unsigned long. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f80a8e403847..cd6e3eec0efe 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -209,7 +209,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define __pa(x) __virt_to_phys((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys(x)) +#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) /* * virt_to_page(k) convert a _valid_ virtual address to struct page * From 2077be6783b5936c3daa838d8addbb635667927f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:49 -0800 Subject: [PATCH 25/74] arm64: Use __pa_symbol for kernel symbols __pa_symbol is technically the marcro that should be used for kernel symbols. Switch to this as a pre-requisite for DEBUG_VIRTUAL which will do bounds checking. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_mmu.h | 4 +-- arch/arm64/include/asm/memory.h | 1 + arch/arm64/include/asm/mmu_context.h | 6 ++--- arch/arm64/include/asm/pgtable.h | 2 +- arch/arm64/kernel/acpi_parking_protocol.c | 3 ++- arch/arm64/kernel/cpu-reset.h | 2 +- arch/arm64/kernel/cpufeature.c | 3 ++- arch/arm64/kernel/hibernate.c | 20 ++++---------- arch/arm64/kernel/insn.c | 2 +- arch/arm64/kernel/psci.c | 3 ++- arch/arm64/kernel/setup.c | 9 ++++--- arch/arm64/kernel/smp_spin_table.c | 3 ++- arch/arm64/kernel/vdso.c | 8 ++++-- arch/arm64/mm/init.c | 12 +++++---- arch/arm64/mm/kasan_init.c | 22 ++++++++++----- arch/arm64/mm/mmu.c | 33 ++++++++++++++--------- 16 files changed, 76 insertions(+), 57 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 6f72fe8b0e3e..55772c13a375 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -47,7 +47,7 @@ * If the page is in the bottom half, we have to use the top half. If * the page is in the top half, we have to use the bottom half: * - * T = __virt_to_phys(__hyp_idmap_text_start) + * T = __pa_symbol(__hyp_idmap_text_start) * if (T & BIT(VA_BITS - 1)) * HYP_VA_MIN = 0 //idmap in upper half * else @@ -271,7 +271,7 @@ static inline void __kvm_flush_dcache_pud(pud_t pud) kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); } -#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) +#define kvm_virt_to_phys(x) __pa_symbol(x) void kvm_set_way_flush(struct kvm_vcpu *vcpu); void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index cd6e3eec0efe..0ff237a75c05 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -210,6 +210,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) +#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) /* * virt_to_page(k) convert a _valid_ virtual address to struct page * diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0363fe80455c..63e9982daca1 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -45,7 +45,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) */ static inline void cpu_set_reserved_ttbr0(void) { - unsigned long ttbr = virt_to_phys(empty_zero_page); + unsigned long ttbr = __pa_symbol(empty_zero_page); write_sysreg(ttbr, ttbr0_el1); isb(); @@ -114,7 +114,7 @@ static inline void cpu_install_idmap(void) local_flush_tlb_all(); cpu_set_idmap_tcr_t0sz(); - cpu_switch_mm(idmap_pg_dir, &init_mm); + cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); } /* @@ -129,7 +129,7 @@ static inline void cpu_replace_ttbr1(pgd_t *pgd) phys_addr_t pgd_phys = virt_to_phys(pgd); - replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1); + replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); cpu_install_idmap(); replace_phys(pgd_phys); diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ffbb9a520563..090134c346db 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -52,7 +52,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); * for zero-mapped memory areas etc.. */ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; -#define ZERO_PAGE(vaddr) pfn_to_page(PHYS_PFN(__pa(empty_zero_page))) +#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page)) #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c index a32b4011d711..1f5655cd9cc9 100644 --- a/arch/arm64/kernel/acpi_parking_protocol.c +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -17,6 +17,7 @@ * along with this program. If not, see . */ #include +#include #include #include @@ -109,7 +110,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu) * that read this address need to convert this address to the * Boot-Loader's endianness before jumping. */ - writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point); + writeq_relaxed(__pa_symbol(secondary_entry), &mailbox->entry_point); writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id); arch_send_wakeup_ipi_mask(cpumask_of(cpu)); diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h index d4e9ecb264f0..6c2b1b4f57c9 100644 --- a/arch/arm64/kernel/cpu-reset.h +++ b/arch/arm64/kernel/cpu-reset.h @@ -24,7 +24,7 @@ static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, el2_switch = el2_switch && !is_kernel_in_hyp_mode() && is_hyp_mode_available(); - restart = (void *)virt_to_phys(__cpu_soft_restart); + restart = (void *)__pa_symbol(__cpu_soft_restart); cpu_install_idmap(); restart(el2_switch, entry, arg0, arg1, arg2); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d249f4391078..1661750dabfb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -746,7 +747,7 @@ static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, int __unused) { - phys_addr_t idmap_addr = virt_to_phys(__hyp_idmap_text_start); + phys_addr_t idmap_addr = __pa_symbol(__hyp_idmap_text_start); /* * Activate the lower HYP offset only if: diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 1b918aaed538..97a7384100f3 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -50,9 +50,6 @@ */ extern int in_suspend; -/* Find a symbols alias in the linear map */ -#define LMADDR(x) phys_to_virt(virt_to_phys(x)) - /* Do we need to reset el2? */ #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) @@ -102,8 +99,8 @@ static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i) int pfn_is_nosave(unsigned long pfn) { - unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); - unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); + unsigned long nosave_begin_pfn = sym_to_pfn(&__nosave_begin); + unsigned long nosave_end_pfn = sym_to_pfn(&__nosave_end - 1); return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); } @@ -125,12 +122,12 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) return -EOVERFLOW; arch_hdr_invariants(&hdr->invariants); - hdr->ttbr1_el1 = virt_to_phys(swapper_pg_dir); + hdr->ttbr1_el1 = __pa_symbol(swapper_pg_dir); hdr->reenter_kernel = _cpu_resume; /* We can't use __hyp_get_vectors() because kvm may still be loaded */ if (el2_reset_needed()) - hdr->__hyp_stub_vectors = virt_to_phys(__hyp_stub_vectors); + hdr->__hyp_stub_vectors = __pa_symbol(__hyp_stub_vectors); else hdr->__hyp_stub_vectors = 0; @@ -460,7 +457,6 @@ int swsusp_arch_resume(void) void *zero_page; size_t exit_size; pgd_t *tmp_pg_dir; - void *lm_restore_pblist; phys_addr_t phys_hibernate_exit; void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, void *, phys_addr_t, phys_addr_t); @@ -480,12 +476,6 @@ int swsusp_arch_resume(void) if (rc) goto out; - /* - * Since we only copied the linear map, we need to find restore_pblist's - * linear map address. - */ - lm_restore_pblist = LMADDR(restore_pblist); - /* * We need a zero page that is zero before & after resume in order to * to break before make on the ttbr1 page tables. @@ -537,7 +527,7 @@ int swsusp_arch_resume(void) } hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, - resume_hdr.reenter_kernel, lm_restore_pblist, + resume_hdr.reenter_kernel, restore_pblist, resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page)); out: diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 1f44cf8c73e6..022d4a9d1738 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -96,7 +96,7 @@ static void __kprobes *patch_map(void *addr, int fixmap) if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) page = vmalloc_to_page(addr); else if (!module) - page = pfn_to_page(PHYS_PFN(__pa(addr))); + page = phys_to_page(__pa_symbol(addr)); else return addr; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 42816bebb1e0..e8edbf13302a 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -45,7 +46,7 @@ static int __init cpu_psci_cpu_prepare(unsigned int cpu) static int cpu_psci_cpu_boot(unsigned int cpu) { - int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); + int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa_symbol(secondary_entry)); if (err) pr_err("failed to boot CPU%d (%d)\n", cpu, err); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b051367e2149..669fc9ff728b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -199,10 +200,10 @@ static void __init request_standard_resources(void) struct memblock_region *region; struct resource *res; - kernel_code.start = virt_to_phys(_text); - kernel_code.end = virt_to_phys(__init_begin - 1); - kernel_data.start = virt_to_phys(_sdata); - kernel_data.end = virt_to_phys(_end - 1); + kernel_code.start = __pa_symbol(_text); + kernel_code.end = __pa_symbol(__init_begin - 1); + kernel_data.start = __pa_symbol(_sdata); + kernel_data.end = __pa_symbol(_end - 1); for_each_memblock(memory, region) { res = alloc_bootmem_low(sizeof(*res)); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 9a00eee9acc8..93034651c87e 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -98,7 +99,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * boot-loader's endianess before jumping. This is mandated by * the boot protocol. */ - writeq_relaxed(__pa(secondary_holding_pen), release_addr); + writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr); __flush_dcache_area((__force void *)release_addr, sizeof(*release_addr)); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index a2c2478e7d78..41b6e31f8f55 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -123,6 +123,7 @@ static int __init vdso_init(void) { int i; struct page **vdso_pagelist; + unsigned long pfn; if (memcmp(&vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); @@ -140,11 +141,14 @@ static int __init vdso_init(void) return -ENOMEM; /* Grab the vDSO data page. */ - vdso_pagelist[0] = pfn_to_page(PHYS_PFN(__pa(vdso_data))); + vdso_pagelist[0] = phys_to_page(__pa_symbol(vdso_data)); + /* Grab the vDSO code pages. */ + pfn = sym_to_pfn(&vdso_start); + for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i); + vdso_pagelist[i + 1] = pfn_to_page(pfn + i); vdso_spec[0].pages = &vdso_pagelist[0]; vdso_spec[1].pages = &vdso_pagelist[1]; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 716d1226ba69..8a2713018f2f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -209,8 +210,8 @@ void __init arm64_memblock_init(void) * linear mapping. Take care not to clip the kernel which may be * high in memory. */ - memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)), - ULLONG_MAX); + memblock_remove(max_t(u64, memstart_addr + linear_region_size, + __pa_symbol(_end)), ULLONG_MAX); if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) { /* ensure that memstart_addr remains sufficiently aligned */ memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size, @@ -225,7 +226,7 @@ void __init arm64_memblock_init(void) */ if (memory_limit != (phys_addr_t)ULLONG_MAX) { memblock_mem_limit_remove_map(memory_limit); - memblock_add(__pa(_text), (u64)(_end - _text)); + memblock_add(__pa_symbol(_text), (u64)(_end - _text)); } if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_start) { @@ -278,7 +279,7 @@ void __init arm64_memblock_init(void) * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. */ - memblock_reserve(__pa(_text), _end - _text); + memblock_reserve(__pa_symbol(_text), _end - _text); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) { memblock_reserve(initrd_start, initrd_end - initrd_start); @@ -484,7 +485,8 @@ void __init mem_init(void) void free_initmem(void) { - free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)), + free_reserved_area(lm_alias(__init_begin), + lm_alias(__init_end), 0, "unused kernel"); /* * Unmap the __init region but leave the VM area in place. This diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 757009daa9ed..201d918e7575 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -26,6 +27,13 @@ static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); +/* + * The p*d_populate functions call virt_to_phys implicitly so they can't be used + * directly on kernel symbols (bm_p*d). All the early functions are called too + * early to use lm_alias so __p*d_populate functions must be used to populate + * with the physical address from __pa_symbol. + */ + static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end) { @@ -33,12 +41,12 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long next; if (pmd_none(*pmd)) - pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte); + __pmd_populate(pmd, __pa_symbol(kasan_zero_pte), PMD_TYPE_TABLE); pte = pte_offset_kimg(pmd, addr); do { next = addr + PAGE_SIZE; - set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page), + set_pte(pte, pfn_pte(sym_to_pfn(kasan_zero_page), PAGE_KERNEL)); } while (pte++, addr = next, addr != end && pte_none(*pte)); } @@ -51,7 +59,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud, unsigned long next; if (pud_none(*pud)) - pud_populate(&init_mm, pud, kasan_zero_pmd); + __pud_populate(pud, __pa_symbol(kasan_zero_pmd), PMD_TYPE_TABLE); pmd = pmd_offset_kimg(pud, addr); do { @@ -68,7 +76,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd, unsigned long next; if (pgd_none(*pgd)) - pgd_populate(&init_mm, pgd, kasan_zero_pud); + __pgd_populate(pgd, __pa_symbol(kasan_zero_pud), PUD_TYPE_TABLE); pud = pud_offset_kimg(pgd, addr); do { @@ -148,7 +156,7 @@ void __init kasan_init(void) */ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir)); dsb(ishst); - cpu_replace_ttbr1(tmp_pg_dir); + cpu_replace_ttbr1(lm_alias(tmp_pg_dir)); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -199,10 +207,10 @@ void __init kasan_init(void) */ for (i = 0; i < PTRS_PER_PTE; i++) set_pte(&kasan_zero_pte[i], - pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO)); + pfn_pte(sym_to_pfn(kasan_zero_page), PAGE_KERNEL_RO)); memset(kasan_zero_page, 0, PAGE_SIZE); - cpu_replace_ttbr1(swapper_pg_dir); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); /* At this point kasan is fully initialized. Enable error messages */ init_task.kasan_depth = 0; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 17243e43184e..a43415784e12 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -359,8 +360,8 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { - unsigned long kernel_start = __pa(_text); - unsigned long kernel_end = __pa(__init_begin); + unsigned long kernel_start = __pa_symbol(_text); + unsigned long kernel_end = __pa_symbol(__init_begin); /* * Take care not to create a writable alias for the @@ -427,14 +428,14 @@ void mark_rodata_ro(void) unsigned long section_size; section_size = (unsigned long)_etext - (unsigned long)_text; - create_mapping_late(__pa(_text), (unsigned long)_text, + create_mapping_late(__pa_symbol(_text), (unsigned long)_text, section_size, PAGE_KERNEL_ROX); /* * mark .rodata as read only. Use __init_begin rather than __end_rodata * to cover NOTES and EXCEPTION_TABLE. */ section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata; - create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata, + create_mapping_late(__pa_symbol(__start_rodata), (unsigned long)__start_rodata, section_size, PAGE_KERNEL_RO); /* flush the TLBs after updating live kernel mappings */ @@ -446,7 +447,7 @@ void mark_rodata_ro(void) static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end, pgprot_t prot, struct vm_struct *vma) { - phys_addr_t pa_start = __pa(va_start); + phys_addr_t pa_start = __pa_symbol(va_start); unsigned long size = va_end - va_start; BUG_ON(!PAGE_ALIGNED(pa_start)); @@ -494,7 +495,7 @@ static void __init map_kernel(pgd_t *pgd) */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), - __pud(__pa(bm_pmd) | PUD_TYPE_TABLE)); + __pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE)); pud_clear_fixmap(); } else { BUG(); @@ -525,7 +526,7 @@ void __init paging_init(void) */ cpu_replace_ttbr1(__va(pgd_phys)); memcpy(swapper_pg_dir, pgd, PAGE_SIZE); - cpu_replace_ttbr1(swapper_pg_dir); + cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); pgd_clear_fixmap(); memblock_free(pgd_phys, PAGE_SIZE); @@ -534,7 +535,7 @@ void __init paging_init(void) * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd * allocated with it. */ - memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE, + memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE, SWAPPER_DIR_SIZE - PAGE_SIZE); } @@ -645,6 +646,12 @@ static inline pte_t * fixmap_pte(unsigned long addr) return &bm_pte[pte_index(addr)]; } +/* + * The p*d_populate functions call virt_to_phys implicitly so they can't be used + * directly on kernel symbols (bm_p*d). This function is called too early to use + * lm_alias so __p*d_populate functions must be used to populate with the + * physical address from __pa_symbol. + */ void __init early_fixmap_init(void) { pgd_t *pgd; @@ -654,7 +661,7 @@ void __init early_fixmap_init(void) pgd = pgd_offset_k(addr); if (CONFIG_PGTABLE_LEVELS > 3 && - !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) { + !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) { /* * We only end up here if the kernel mapping and the fixmap * share the top level pgd entry, which should only happen on @@ -663,12 +670,14 @@ void __init early_fixmap_init(void) BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); pud = pud_offset_kimg(pgd, addr); } else { - pgd_populate(&init_mm, pgd, bm_pud); + if (pgd_none(*pgd)) + __pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE); pud = fixmap_pud(addr); } - pud_populate(&init_mm, pud, bm_pmd); + if (pud_none(*pud)) + __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); pmd = fixmap_pmd(addr); - pmd_populate_kernel(&init_mm, pmd, bm_pte); + __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE); /* * The boot-ioremap range spans multiple pmds, for which From ec6d06efb0bac6cd92846e42d1afe5b98b57e7c2 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:50 -0800 Subject: [PATCH 26/74] arm64: Add support for CONFIG_DEBUG_VIRTUAL x86 has an option CONFIG_DEBUG_VIRTUAL to do additional checks on virt_to_phys calls. The goal is to catch users who are calling virt_to_phys on non-linear addresses immediately. This inclues callers using virt_to_phys on image addresses instead of __pa_symbol. As features such as CONFIG_VMAP_STACK get enabled for arm64, this becomes increasingly important. Add checks to catch bad virt_to_phys usage. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/memory.h | 31 ++++++++++++++++++++++++++++--- arch/arm64/mm/Makefile | 2 ++ arch/arm64/mm/physaddr.c | 30 ++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/mm/physaddr.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 111742126897..359bca239ba7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -6,6 +6,7 @@ config ARM64 select ACPI_MCFG if ACPI select ACPI_SPCR_TABLE if ACPI select ARCH_CLOCKSOURCE_DATA + select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 0ff237a75c05..7011f0857629 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -172,10 +172,33 @@ static inline unsigned long kaslr_offset(void) * private definitions which should NOT be used outside memory.h * files. Use virt_to_phys/phys_to_virt/__pa/__va instead. */ -#define __virt_to_phys(x) ({ \ + + +/* + * The linear kernel range starts in the middle of the virtual adddress + * space. Testing the top bit for the start of the region is a + * sufficient check. + */ +#define __is_lm_address(addr) (!!((addr) & BIT(VA_BITS - 1))) + +#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) +#define __kimg_to_phys(addr) ((addr) - kimage_voffset) + +#define __virt_to_phys_nodebug(x) ({ \ phys_addr_t __x = (phys_addr_t)(x); \ - __x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET : \ - (__x - kimage_voffset); }) + __is_lm_address(__x) ? __lm_to_phys(__x) : \ + __kimg_to_phys(__x); \ +}) + +#define __pa_symbol_nodebug(x) __kimg_to_phys((phys_addr_t)(x)) + +#ifdef CONFIG_DEBUG_VIRTUAL +extern phys_addr_t __virt_to_phys(unsigned long x); +extern phys_addr_t __phys_addr_symbol(unsigned long x); +#else +#define __virt_to_phys(x) __virt_to_phys_nodebug(x) +#define __phys_addr_symbol(x) __pa_symbol_nodebug(x) +#endif #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) @@ -207,6 +230,8 @@ static inline void *phys_to_virt(phys_addr_t x) * Drivers should NOT use these either. */ #define __pa(x) __virt_to_phys((unsigned long)(x)) +#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0)) +#define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index e703fb9defad..9b0ba191e48e 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -6,6 +6,8 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o +obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o +KASAN_SANITIZE_physaddr.o += n obj-$(CONFIG_KASAN) += kasan_init.o KASAN_SANITIZE_kasan_init.o := n diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c new file mode 100644 index 000000000000..91371daf397c --- /dev/null +++ b/arch/arm64/mm/physaddr.c @@ -0,0 +1,30 @@ +#include +#include +#include +#include +#include + +#include + +phys_addr_t __virt_to_phys(unsigned long x) +{ + WARN(!__is_lm_address(x), + "virt_to_phys used for non-linear address: %pK (%pS)\n", + (void *)x, + (void *)x); + + return __virt_to_phys_nodebug(x); +} +EXPORT_SYMBOL(__virt_to_phys); + +phys_addr_t __phys_addr_symbol(unsigned long x) +{ + /* + * This is bounds checking against the kernel image only. + * __pa_symbol should only be used on kernel symbol addresses. + */ + VIRTUAL_BUG_ON(x < (unsigned long) KERNEL_START || + x > (unsigned long) KERNEL_END); + return __pa_symbol_nodebug(x); +} +EXPORT_SYMBOL(__phys_addr_symbol); From 7f332fc1f0f053799aeda587cefbc67b5ecd72c5 Mon Sep 17 00:00:00 2001 From: Takeshi Kihara Date: Wed, 11 Jan 2017 11:11:17 +0100 Subject: [PATCH 27/74] arm64: Add support for DMA_ATTR_SKIP_CPU_SYNC attribute to swiotlb This patch adds support for DMA_ATTR_SKIP_CPU_SYNC attribute for dma_{un}map_{page,sg} functions family to swiotlb. DMA_ATTR_SKIP_CPU_SYNC allows platform code to skip synchronization of the CPU cache for the given buffer assuming that it has been already transferred to 'device' domain. Ported from IOMMU .{un}map_{sg,page} ops. Acked-by: Laurent Pinchart Reviewed-by: Robin Murphy Signed-off-by: Takeshi Kihara Signed-off-by: Geert Uytterhoeven Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e04082700bb1..1d7d5d2881db 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -211,7 +211,8 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, dma_addr_t dev_addr; dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); - if (!is_device_dma_coherent(dev)) + if (!is_device_dma_coherent(dev) && + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); return dev_addr; @@ -222,7 +223,8 @@ static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - if (!is_device_dma_coherent(dev)) + if (!is_device_dma_coherent(dev) && + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); } @@ -235,7 +237,8 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int i, ret; ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); - if (!is_device_dma_coherent(dev)) + if (!is_device_dma_coherent(dev) && + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) for_each_sg(sgl, sg, ret, i) __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), sg->length, dir); @@ -251,7 +254,8 @@ static void __swiotlb_unmap_sg_attrs(struct device *dev, struct scatterlist *sg; int i; - if (!is_device_dma_coherent(dev)) + if (!is_device_dma_coherent(dev) && + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) for_each_sg(sgl, sg, nelems, i) __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), sg->length, dir); From f92f5ce01ee6a6a86cbfc4e3b0d18529c302b1ea Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 12 Jan 2017 16:37:28 +0000 Subject: [PATCH 28/74] arm64: Advertise support for Rounding double multiply instructions ARM v8.1 extensions include support for rounding double multiply add/subtract instructions to the A64 SIMD instructions set. Let the userspace know about it via a HWCAP bit. Cc: Mark Rutland Cc: Will Deacon Acked-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/cpuinfo.c | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 773c90b05458..61c263cba272 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -31,5 +31,6 @@ #define HWCAP_FPHP (1 << 9) #define HWCAP_ASIMDHP (1 << 10) #define HWCAP_CPUID (1 << 11) +#define HWCAP_ASIMDRDM (1 << 12) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1661750dabfb..a995aae59056 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -881,6 +881,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 4d44edd2b140..5b22c687f02a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -64,6 +64,7 @@ static const char *const hwcap_str[] = { "fphp", "asimdhp", "cpuid", + "asimdrdm", NULL }; From eac8017f0c4719d30f89d1a247de7bfbf6d0da4f Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Fri, 13 Jan 2017 13:59:35 +0800 Subject: [PATCH 29/74] arm64: mm: use phys_addr_t instead of unsigned long in __map_memblock Cosmetic change to use phys_addr_t instead of unsigned long for the return value of __pa_symbol(). Acked-by: Catalin Marinas Acked-by: Mark Rutland Signed-off-by: Miles Chen Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a43415784e12..2131521ddc24 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -360,8 +360,8 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt, static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end) { - unsigned long kernel_start = __pa_symbol(_text); - unsigned long kernel_end = __pa_symbol(__init_begin); + phys_addr_t kernel_start = __pa_symbol(_text); + phys_addr_t kernel_end = __pa_symbol(__init_begin); /* * Take care not to create a writable alias for the From fa5ce3d1928c441c3d241c34a00c07c8f5880b1a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 13 Jan 2017 14:12:09 +0100 Subject: [PATCH 30/74] arm64: errata: Provide macro for major and minor cpu revisions Definition of cpu ranges are hard to read if the cpu variant is not zero. Provide MIDR_CPU_VAR_REV() macro to describe the full hardware revision of a cpu including variant and (minor) revision. Signed-off-by: Robert Richter Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 3 +++ arch/arm64/kernel/cpu_errata.c | 15 +++++++++------ arch/arm64/kernel/cpufeature.c | 8 +++----- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 26a68ddb11c1..5196f0afaabd 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -56,6 +56,9 @@ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) +#define MIDR_CPU_VAR_REV(var, rev) \ + (((var) << MIDR_VARIANT_SHIFT) | (rev)) + #define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ MIDR_ARCHITECTURE_MASK) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b75e917aac46..722284eaf51e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -79,8 +79,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 832075", .capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE, - MIDR_RANGE(MIDR_CORTEX_A57, 0x00, - (1 << MIDR_VARIANT_SHIFT) | 2), + MIDR_RANGE(MIDR_CORTEX_A57, + MIDR_CPU_VAR_REV(0, 0), + MIDR_CPU_VAR_REV(1, 2)), }, #endif #ifdef CONFIG_ARM64_ERRATUM_834220 @@ -88,8 +89,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 834220", .capability = ARM64_WORKAROUND_834220, - MIDR_RANGE(MIDR_CORTEX_A57, 0x00, - (1 << MIDR_VARIANT_SHIFT) | 2), + MIDR_RANGE(MIDR_CORTEX_A57, + MIDR_CPU_VAR_REV(0, 0), + MIDR_CPU_VAR_REV(1, 2)), }, #endif #ifdef CONFIG_ARM64_ERRATUM_845719 @@ -113,8 +115,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ .desc = "Cavium erratum 27456", .capability = ARM64_WORKAROUND_CAVIUM_27456, - MIDR_RANGE(MIDR_THUNDERX, 0x00, - (1 << MIDR_VARIANT_SHIFT) | 1), + MIDR_RANGE(MIDR_THUNDERX, + MIDR_CPU_VAR_REV(0, 0), + MIDR_CPU_VAR_REV(1, 1)), }, { /* Cavium ThunderX, T81 pass 1.0 */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a995aae59056..ed675061dda0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -730,13 +730,11 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int __unused) { u32 midr = read_cpuid_id(); - u32 rv_min, rv_max; /* Cavium ThunderX pass 1.x and 2.x */ - rv_min = 0; - rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK; - - return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max); + return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, + MIDR_CPU_VAR_REV(0, 0), + MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); } static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) From 5fa23530d4fcc7e84be9a557c58d0e670a15c042 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 16 Jan 2017 10:40:43 +0000 Subject: [PATCH 31/74] of: base: add support to find the level of the last cache It is useful to have helper function just to get the number of cache levels for a given logical cpu. We can obtain the same by just checking the level at which the last cache is present. This patch adds support to find the level of the last cache for a given cpu. It will be used on ARM64 platform where the device tree provides the information for the additional non-architected/transparent/external last level caches that are not integrated with the processors. Cc: Mark Rutland Suggested-by: Rob Herring Acked-by: Rob Herring Tested-by: Tan Xiaojun Signed-off-by: Sudeep Holla [will: use u32 instead of int for cache_level] Signed-off-by: Will Deacon --- drivers/of/base.c | 26 ++++++++++++++++++++++++++ include/linux/of.h | 1 + 2 files changed, 27 insertions(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index d4bea3c797d6..a641b1faf057 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -2267,6 +2268,31 @@ struct device_node *of_find_next_cache_node(const struct device_node *np) return NULL; } +/** + * of_find_last_cache_level - Find the level at which the last cache is + * present for the given logical cpu + * + * @cpu: cpu number(logical index) for which the last cache level is needed + * + * Returns the the level at which the last cache is present. It is exactly + * same as the total number of cache levels for the given logical cpu. + */ +int of_find_last_cache_level(unsigned int cpu) +{ + u32 cache_level = 0; + struct device_node *prev = NULL, *np = of_cpu_device_node_get(cpu); + + while (np) { + prev = np; + of_node_put(np); + np = of_find_next_cache_node(np); + } + + of_property_read_u32(prev, "cache-level", &cache_level); + + return cache_level; +} + /** * of_graph_parse_endpoint() - parse common endpoint node properties * @node: pointer to endpoint device_node diff --git a/include/linux/of.h b/include/linux/of.h index d72f01009297..21e6323de0f3 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -280,6 +280,7 @@ extern struct device_node *of_get_child_by_name(const struct device_node *node, /* cache lookup */ extern struct device_node *of_find_next_cache_node(const struct device_node *); +extern int of_find_last_cache_level(unsigned int cpu); extern struct device_node *of_find_node_with_property( struct device_node *from, const char *prop_name); From 9a802431c527f0ef860399f066a9793794cac17b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 16 Jan 2017 10:40:44 +0000 Subject: [PATCH 32/74] arm64: cacheinfo: add support to override cache levels via device tree The cache hierarchy can be identified through Cache Level ID(CLIDR) architected system register. However in some cases it will provide only the number of cache levels that are integrated into the processor itself. In other words, it can't provide any information about the caches that are external and/or transparent. Some platforms require to export the information about all such external caches to the userspace applications via the sysfs interface. This patch adds support to override the cache levels using device tree to take such external non-architected caches into account. Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Tested-by: Tan Xiaojun Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/kernel/cacheinfo.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 9617301f76b5..3f2250fc391b 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -84,7 +84,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, static int __init_cache_level(unsigned int cpu) { - unsigned int ctype, level, leaves; + unsigned int ctype, level, leaves, of_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { @@ -97,6 +97,17 @@ static int __init_cache_level(unsigned int cpu) leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; } + of_level = of_find_last_cache_level(cpu); + if (level < of_level) { + /* + * some external caches not specified in CLIDR_EL1 + * the information may be available in the device tree + * only unified external caches are considered here + */ + leaves += (of_level - level); + level = of_level; + } + this_cpu_ci->num_levels = level; this_cpu_ci->num_leaves = leaves; return 0; From 9bb003600ed9aabfe33b7330fa7e93acf959e8df Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jan 2017 16:10:56 +0000 Subject: [PATCH 33/74] arm64: head.S: avoid open-coded adr_l Some places in the kernel open-code sequences using ADRP for a symbol another instruction using a :lo12: relocation for that same symbol. These sequences are easy to get wrong, and more painful to read than is necessary. For these reasons, it is preferable to use the {adr,ldr,str}_l macros for these cases. This patch makes use of adr_l these in head.S, removing an open-coded sequence using adrp. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: Marc Zyngier Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 9b0857a89a2a..d3b8f92de5c5 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -613,8 +613,7 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems install_el2_stub: /* Hypervisor stub */ - adrp x0, __hyp_stub_vectors - add x0, x0, #:lo12:__hyp_stub_vectors + adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 /* spsr */ From 526d10ae022c36722ce5b2db22b02f9353281875 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jan 2017 16:10:57 +0000 Subject: [PATCH 34/74] arm64: efi-entry.S: avoid open-coded adr_l Some places in the kernel open-code sequences using ADRP for a symbol another instruction using a :lo12: relocation for that same symbol. These sequences are easy to get wrong, and more painful to read than is necessary. For these reasons, it is preferable to use the {adr,ldr,str}_l macros for these cases. This patch makes use of these in efi-entry.S, removing open-coded sequences using adrp. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: Matt Fleming Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/efi-entry.S | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index e88c064b845c..4e6ad355bd05 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -46,8 +46,7 @@ ENTRY(entry) * efi_system_table_t *sys_table, * unsigned long *image_addr) ; */ - adrp x8, _text - add x8, x8, #:lo12:_text + adr_l x8, _text add x2, sp, 16 str x8, [x2] bl efi_entry @@ -68,10 +67,8 @@ ENTRY(entry) /* * Calculate size of the kernel Image (same for original and copy). */ - adrp x1, _text - add x1, x1, #:lo12:_text - adrp x2, _edata - add x2, x2, #:lo12:_edata + adr_l x1, _text + adr_l x2, _edata sub x1, x2, x1 /* From 829d2bd1339220b742e5e61a5f1975eb6f74cad3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 17 Jan 2017 16:10:58 +0000 Subject: [PATCH 35/74] arm64: entry-ftrace.S: avoid open-coded {adr,ldr}_l Some places in the kernel open-code sequences using ADRP for a symbol another instruction using a :lo12: relocation for that same symbol. These sequences are easy to get wrong, and more painful to read than is necessary. For these reasons, it is preferable to use the {adr,ldr,str}_l macros for these cases. This patch makes use of these in entry-ftrace.S, removing open-coded sequences using adrp. This results in a minor code change, since a temporary register is not used when generating the address for some symbols, but this is fine, as the value of the temporary register is not used elsewhere. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: AKASHI Takahiro Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-ftrace.S | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index aef02d2af3b5..8f1905b795f3 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -98,8 +98,7 @@ ENTRY(_mcount) mcount_enter - adrp x0, ftrace_trace_function - ldr x2, [x0, #:lo12:ftrace_trace_function] + ldr_l x2, ftrace_trace_function adr x0, ftrace_stub cmp x0, x2 // if (ftrace_trace_function b.eq skip_ftrace_call // != ftrace_stub) { @@ -115,15 +114,12 @@ skip_ftrace_call: // return; mcount_exit // return; // } skip_ftrace_call: - adrp x1, ftrace_graph_return - ldr x2, [x1, #:lo12:ftrace_graph_return] + ldr_l x2, ftrace_graph_return cmp x0, x2 // if ((ftrace_graph_return b.ne ftrace_graph_caller // != ftrace_stub) - adrp x1, ftrace_graph_entry // || (ftrace_graph_entry - adrp x0, ftrace_graph_entry_stub // != ftrace_graph_entry_stub)) - ldr x2, [x1, #:lo12:ftrace_graph_entry] - add x0, x0, #:lo12:ftrace_graph_entry_stub + ldr_l x2, ftrace_graph_entry // || (ftrace_graph_entry + adr_l x0, ftrace_graph_entry_stub // != ftrace_graph_entry_stub)) cmp x0, x2 b.ne ftrace_graph_caller // ftrace_graph_caller(); From 4a8d8a14c0d08c2437cb80c05e88f6cc1ca3fb2c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jan 2017 10:49:12 +0000 Subject: [PATCH 36/74] arm64: dma-mapping: Only swizzle DMA ops for IOMMU_DOMAIN_DMA The arm64 DMA-mapping implementation sets the DMA ops to the IOMMU DMA ops if we detect that an IOMMU is present for the master and the DMA ranges are valid. In the case when the IOMMU domain for the device is not of type IOMMU_DOMAIN_DMA, then we have no business swizzling the ops, since we're not in control of the underlying address space. This patch leaves the DMA ops alone for masters attached to non-DMA IOMMU domains. Reviewed-by: Robin Murphy Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 1d7d5d2881db..0ec1ae933342 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -835,14 +835,21 @@ static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, * then the IOMMU core will have already configured a group for this * device, and allocated the default domain for that group. */ - if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) { - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", - dev_name(dev)); - return false; + if (!domain) + goto out_err; + + if (domain->type == IOMMU_DOMAIN_DMA) { + if (iommu_dma_init_domain(domain, dma_base, size, dev)) + goto out_err; + + dev->archdata.dma_ops = &iommu_dma_ops; } - dev->archdata.dma_ops = &iommu_dma_ops; return true; +out_err: + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + dev_name(dev)); + return false; } static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, From cbb999dd0b452991f4f698142aa7ffe566c0b415 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Jan 2017 12:43:40 +0100 Subject: [PATCH 37/74] arm64: Use __pa_symbol for empty_zero_page If CONFIG_DEBUG_VIRTUAL=y and CONFIG_ARM64_SW_TTBR0_PAN=y: virt_to_phys used for non-linear address: ffffff8008cc0000 (empty_zero_page+0x0/0x1000) WARNING: CPU: 0 PID: 0 at arch/arm64/mm/physaddr.c:14 __virt_to_phys+0x28/0x60 ... [] __virt_to_phys+0x28/0x60 [] setup_arch+0x46c/0x4d4 Fixes: 2077be6783b5936c ("arm64: Use __pa_symbol for kernel symbols") Acked-by: Mark Rutland Acked-by: Laura Abbott Signed-off-by: Geert Uytterhoeven Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 669fc9ff728b..b5222094ab52 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -298,7 +298,7 @@ void __init setup_arch(char **cmdline_p) * faults in case uaccess_enable() is inadvertently called by the init * thread. */ - init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page); + init_task.thread_info.ttbr0 = __pa_symbol(empty_zero_page); #endif #ifdef CONFIG_VT From 1c33dc1015be482ca1a248981698d75369ce8cdc Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 24 Jan 2017 16:30:19 +0100 Subject: [PATCH 38/74] drivers: firmware: psci: Use __pa_symbol for cpu_resume If CONFIG_DEBUG_VIRTUAL=y, during s2ram: virt_to_phys used for non-linear address: ffffff80085db280 (cpu_resume+0x0/0x20) ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1628 at arch/arm64/mm/physaddr.c:14 __virt_to_phys+0x28/0x60 ... [] __virt_to_phys+0x28/0x60 [] psci_system_suspend+0x20/0x44 [] cpu_suspend+0x3c/0x68 [] psci_system_suspend_enter+0x18/0x20 [] suspend_devices_and_enter+0x3f8/0x7e8 [] pm_suspend+0x544/0x5f4 Fixes: 1a08e3d9e0ac4577 ("drivers: firmware: psci: Use __pa_symbol for kernel symbol") Acked-by: Mark Rutland Acked-by: Laura Abbott Signed-off-by: Geert Uytterhoeven Signed-off-by: Will Deacon --- drivers/firmware/psci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c index 66a8793f3b37..493a56a4cfc4 100644 --- a/drivers/firmware/psci.c +++ b/drivers/firmware/psci.c @@ -419,7 +419,7 @@ CPUIDLE_METHOD_OF_DECLARE(psci, "psci", &psci_cpuidle_ops); static int psci_system_suspend(unsigned long unused) { return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND), - virt_to_phys(cpu_resume), 0, 0); + __pa_symbol(cpu_resume), 0, 0); } static int psci_system_suspend_enter(suspend_state_t state) From 79ba11d24b28ead94b976c4d7c8bf8e6c349eb36 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 24 Jan 2017 17:11:40 +0000 Subject: [PATCH 39/74] arm64: kernel: do not mark reserved memory regions as IORESOURCE_BUSY Memory regions marked as NOMAP should not be used for general allocation by the kernel, and should not even be covered by the linear mapping (hence the name). However, drivers or other subsystems (such as ACPI) that access the firmware directly may legally access them, which means it is also reasonable for such drivers to claim them by invoking request_resource(). Currently, this is prevented by the fact that arm64's request_standard_resources() marks reserved regions as IORESOURCE_BUSY. So drop the IORESOURCE_BUSY flag from these requests. Reported-by: Hanjun Guo Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b5222094ab52..952e2c0dabd5 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -209,7 +209,7 @@ static void __init request_standard_resources(void) res = alloc_bootmem_low(sizeof(*res)); if (memblock_is_nomap(region)) { res->name = "reserved"; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM; } else { res->name = "System RAM"; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; From 2e449048a25eb75d48dff12882b93f26d130a1c6 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 26 Jan 2017 11:19:55 +0800 Subject: [PATCH 40/74] arm64: Kconfig: select COMPAT_BINFMT_ELF only when BINFMT_ELF is set Fix warning: "(COMPAT) selects COMPAT_BINFMT_ELF which has unmet direct dependencies (COMPAT && BINFMT_ELF)" Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 359bca239ba7..bac0d1bb58b5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1011,7 +1011,7 @@ source "fs/Kconfig.binfmt" config COMPAT bool "Kernel support for 32-bit EL0" depends on ARM64_4K_PAGES || EXPERT - select COMPAT_BINFMT_ELF + select COMPAT_BINFMT_ELF if BINFMT_ELF select HAVE_UID16 select OLD_SIGSUSPEND3 select COMPAT_OLD_SIGACTION From adbe7e26f4257f72817495b9bce114284060b0d7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 25 Jan 2017 18:31:31 +0000 Subject: [PATCH 41/74] arm64: dma-mapping: Fix dma_mapping_error() when bypassing SWIOTLB When bypassing SWIOTLB on small-memory systems, we need to avoid calling into swiotlb_dma_mapping_error() in exactly the same way as we avoid swiotlb_dma_supported(), because the former also relies on SWIOTLB state being initialised. Under the assumptions for which we skip SWIOTLB, dma_map_{single,page}() will only ever return the DMA-offset-adjusted physical address of the page passed in, thus we can report success unconditionally. Fixes: b67a8b29df7e ("arm64: mm: only initialize swiotlb when necessary") CC: stable@vger.kernel.org CC: Jisheng Zhang Reported-by: Aaro Koskinen Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 0ec1ae933342..7ce2b78a72b9 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -356,6 +356,13 @@ static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) return 1; } +static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr) +{ + if (swiotlb) + return swiotlb_dma_mapping_error(hwdev, addr); + return 0; +} + static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, @@ -370,7 +377,7 @@ static struct dma_map_ops swiotlb_dma_ops = { .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, .sync_sg_for_device = __swiotlb_sync_sg_for_device, .dma_supported = __swiotlb_dma_supported, - .mapping_error = swiotlb_dma_mapping_error, + .mapping_error = __swiotlb_dma_mapping_error, }; static int __init atomic_pool_init(void) From ea5f9d1a69654dd5bf4d7dc60278b91d354afc64 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Wed, 25 Jan 2017 10:52:29 -0500 Subject: [PATCH 42/74] arm64: Define Falkor v1 CPU Define the MIDR implementer and part number field values for the Qualcomm Datacenter Technologies Falkor processor version 1 in the usual manner. Signed-off-by: Shanker Donthineni Signed-off-by: Christopher Covington Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 5196f0afaabd..fc502713ab37 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -74,6 +74,7 @@ #define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_IMP_CAVIUM 0x43 #define ARM_CPU_IMP_BRCM 0x42 +#define ARM_CPU_IMP_QCOM 0x51 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -87,10 +88,13 @@ #define BRCM_CPU_PART_VULCAN 0x516 +#define QCOM_CPU_PART_FALKOR_V1 0x800 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) +#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #ifndef __ASSEMBLY__ From fa715319c11f7f94ae3d19efb9c2cd69fa933f48 Mon Sep 17 00:00:00 2001 From: Christopher Covington Date: Wed, 25 Jan 2017 10:52:31 -0500 Subject: [PATCH 43/74] arm64: Use __tlbi() macros in KVM code Refactor the KVM code to use the __tlbi macros, which will allow an errata workaround that repeats tlbi dsb sequences to only change one location. This is not intended to change the generated assembly and comparing before and after vmlinux objdump shows no functional changes. Acked-by: Christoffer Dall Signed-off-by: Christopher Covington Signed-off-by: Will Deacon --- arch/arm64/kvm/hyp/tlb.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 88e2f2b938f0..e8e7ba2bc11f 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -16,6 +16,7 @@ */ #include +#include void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { @@ -32,7 +33,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) * whole of Stage-1. Weep... */ ipa >>= 12; - asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa)); + __tlbi(ipas2e1is, ipa); /* * We have to ensure completion of the invalidation at Stage-2, @@ -41,7 +42,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) * the Stage-1 invalidation happened first. */ dsb(ish); - asm volatile("tlbi vmalle1is" : : ); + __tlbi(vmalle1is); dsb(ish); isb(); @@ -57,7 +58,7 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) write_sysreg(kvm->arch.vttbr, vttbr_el2); isb(); - asm volatile("tlbi vmalls12e1is" : : ); + __tlbi(vmalls12e1is); dsb(ish); isb(); @@ -72,7 +73,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) write_sysreg(kvm->arch.vttbr, vttbr_el2); isb(); - asm volatile("tlbi vmalle1" : : ); + __tlbi(vmalle1); dsb(nsh); isb(); @@ -82,7 +83,7 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) void __hyp_text __kvm_flush_vm_context(void) { dsb(ishst); - asm volatile("tlbi alle1is \n" - "ic ialluis ": : ); + __tlbi(alle1is); + asm volatile("ic ialluis" : : ); dsb(ish); } From 49f6cba617fef4bc097a291e0dfd028cc7073c52 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 27 Jan 2017 16:15:38 +0000 Subject: [PATCH 44/74] arm64: handle sys and undef traps consistently If an EL0 instruction in the SYS class triggers an exception, do_sysintr looks for a sys64_hook matching the instruction, and if none is found, injects a SIGILL. This mirrors what we do for undefined instruction encodings in do_undefinstr, where we look for an undef_hook matching the instruction, and if none is found, inject a SIGILL. Over time, new SYS instruction encodings may be allocated. Prior to allocation, exceptions resulting from these would be handled by do_undefinstr, whereas after allocation these may be handled by do_sysintr. To ensure that we have consistent behaviour if and when this happens, it would be beneficial to have do_sysinstr fall back to do_undefinstr. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Suzuki Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8187229eb802..7c3fc0634aa2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -531,7 +531,12 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) return; } - force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); + /* + * New SYS instructions may previously have been undefined at EL0. Fall + * back to our usual undefined instruction handler so that we handle + * these consistently. + */ + do_undefinstr(regs); } long compat_arm_syscall(struct pt_regs *regs); From ec663d967b2276448a416406ca59ff247c0c80c5 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 27 Jan 2017 10:54:12 +0000 Subject: [PATCH 45/74] arm64: Improve detection of user/non-user mappings in set_pte(_at) Commit cab15ce604e5 ("arm64: Introduce execute-only page access permissions") allowed a valid user PTE to have the PTE_USER bit clear. As a consequence, the pte_valid_not_user() macro in set_pte() was replaced with pte_valid_global() under the assumption that only user pages have the nG bit set. EFI mappings, however, also have the nG bit set and set_pte() wrongly ignores issuing the DSB+ISB. This patch reinstates the pte_valid_not_user() macro and adds the PTE_UXN bit check since all kernel mappings have this bit set. For clarity, pte_exec() is renamed to pte_user_exec() as it only checks for the absence of PTE_UXN. Consequently, the user executable check in set_pte_at() drops the pte_ng() test since pte_user_exec() is sufficient. Fixes: cab15ce604e5 ("arm64: Introduce execute-only page access permissions") Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 090134c346db..0eef6064bf3b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -71,9 +71,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) -#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) +#define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) -#define pte_ng(pte) (!!(pte_val(pte) & PTE_NG)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -84,8 +83,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -#define pte_valid_global(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_NG)) == PTE_VALID) +/* + * Execute-only user mappings do not have the PTE_USER bit set. All valid + * kernel mappings have the PTE_UXN bit set. + */ +#define pte_valid_not_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) @@ -178,7 +181,7 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * Only if the new pte is valid and kernel, otherwise TLB maintenance * or update_mmu_cache() have the necessary barriers. */ - if (pte_valid_global(pte)) { + if (pte_valid_not_user(pte)) { dsb(ishst); isb(); } @@ -212,7 +215,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; - if (pte_ng(pte) && pte_exec(pte) && !pte_special(pte)) + if (pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte, addr); } From d9ff80f83ecbf4cbdf56d32d01c312498e4fb1cd Mon Sep 17 00:00:00 2001 From: Christopher Covington Date: Tue, 31 Jan 2017 12:50:19 -0500 Subject: [PATCH 46/74] arm64: Work around Falkor erratum 1009 During a TLB invalidate sequence targeting the inner shareable domain, Falkor may prematurely complete the DSB before all loads and stores using the old translation are observed. Instruction fetches are not subject to the conditions of this erratum. If the original code sequence includes multiple TLB invalidate instructions followed by a single DSB, onle one of the TLB instructions needs to be repeated to work around this erratum. While the erratum only applies to cases in which the TLBI specifies the inner-shareable domain (*IS form of TLBI) and the DSB is ISH form or stronger (OSH, SYS), this changes applies the workaround overabundantly-- to local TLBI, DSB NSH sequences as well--for simplicity. Based on work by Shanker Donthineni Signed-off-by: Christopher Covington Acked-by: Mark Rutland Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 10 ++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/tlbflush.h | 18 +++++++++++++++--- arch/arm64/kernel/cpu_errata.c | 9 +++++++++ 5 files changed, 37 insertions(+), 4 deletions(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 405da11fc3e4..0006a94c2321 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -63,3 +63,4 @@ stable kernels. | Cavium | ThunderX SMMUv2 | #27704 | N/A | | | | | | | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009| diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bac0d1bb58b5..0ce23130cc9b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -480,6 +480,16 @@ config CAVIUM_ERRATUM_27456 If unsure, say Y. +config QCOM_FALKOR_ERRATUM_1009 + bool "Falkor E1009: Prematurely complete a DSB after a TLBI" + default y + help + On Falkor v1, the CPU may prematurely complete a DSB following a + TLBI xxIS invalidate maintenance operation. Repeat the TLBI operation + one more time to fix the issue. + + If unsure, say Y. + endmenu diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 4174f09678c4..d1207ac696ac 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -35,7 +35,8 @@ #define ARM64_HYP_OFFSET_LOW 14 #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 #define ARM64_HAS_NO_FPSIMD 16 +#define ARM64_WORKAROUND_REPEAT_TLBI 17 -#define ARM64_NCAPS 17 +#define ARM64_NCAPS 18 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index deab52374119..af1c76981911 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -36,9 +36,21 @@ * not. The macros handles invoking the asm with or without the * register argument as appropriate. */ -#define __TLBI_0(op, arg) asm ("tlbi " #op) -#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0" : : "r" (arg)) -#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) +#define __TLBI_0(op, arg) asm ("tlbi " #op "\n" \ + ALTERNATIVE("nop\n nop", \ + "dsb ish\n tlbi " #op, \ + ARM64_WORKAROUND_REPEAT_TLBI, \ + CONFIG_QCOM_FALKOR_ERRATUM_1009) \ + : : ) + +#define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \ + ALTERNATIVE("nop\n nop", \ + "dsb ish\n tlbi " #op ", %0", \ + ARM64_WORKAROUND_REPEAT_TLBI, \ + CONFIG_QCOM_FALKOR_ERRATUM_1009) \ + : : "r" (arg)) + +#define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) #define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 722284eaf51e..32b9beda2ac8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -133,6 +133,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .def_scope = SCOPE_LOCAL_CPU, .enable = cpu_enable_trap_ctr_access, }, +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 + { + .desc = "Qualcomm Technologies Falkor erratum 1009", + .capability = ARM64_WORKAROUND_REPEAT_TLBI, + MIDR_RANGE(MIDR_QCOM_FALKOR_V1, + MIDR_CPU_VAR_REV(0, 0), + MIDR_CPU_VAR_REV(0, 0)), + }, +#endif { } }; From 3d29a9a0f88300b7ccb642ebee61b331ef0a8c27 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 1 Feb 2017 15:01:05 -0800 Subject: [PATCH 47/74] arm64: make use of for_each_node_by_type() Instead of open-coding the loop, let's use canned macro. Signed-off-by: Dmitry Torokhov Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index cb87234cfcf2..a8ec5da530af 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -603,9 +603,9 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, */ static void __init of_parse_and_init_cpus(void) { - struct device_node *dn = NULL; + struct device_node *dn; - while ((dn = of_find_node_by_type(dn, "cpu"))) { + for_each_node_by_type(dn, "cpu") { u64 hwid = of_get_cpu_mpidr(dn); if (hwid == INVALID_HWID) From f85279b4bd481a1a0697c1d2a8a5f15de216b120 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 22 Sep 2016 11:35:43 +0100 Subject: [PATCH 48/74] arm64: KVM: Save/restore the host SPE state when entering/leaving a VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SPE buffer is virtually addressed, using the page tables of the CPU MMU. Unusually, this means that the EL0/1 page table may be live whilst we're executing at EL2 on non-VHE configurations. When VHE is in use, we can use the same property to profile the guest behind its back. This patch adds the relevant disabling and flushing code to KVM so that the host can make use of SPE without corrupting guest memory, and any attempts by a guest to use SPE will result in a trap. Acked-by: Marc Zyngier Cc: Alex Bennée Cc: Christoffer Dall Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_arm.h | 3 ++ arch/arm64/include/asm/kvm_host.h | 7 +++- arch/arm64/kvm/debug.c | 6 +++ arch/arm64/kvm/hyp/debug-sr.c | 66 ++++++++++++++++++++++++++++++- arch/arm64/kvm/hyp/switch.c | 17 +++++++- 5 files changed, 95 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 2a2752b5b6aa..6e99978e83bd 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -188,6 +188,9 @@ #define CPTR_EL2_DEFAULT 0x000033ff /* Hyp Debug Configuration Register bits */ +#define MDCR_EL2_TPMS (1 << 14) +#define MDCR_EL2_E2PB_MASK (UL(0x3)) +#define MDCR_EL2_E2PB_SHIFT (UL(12)) #define MDCR_EL2_TDRA (1 << 11) #define MDCR_EL2_TDOSA (1 << 10) #define MDCR_EL2_TDA (1 << 9) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e5050388e062..443b387021f2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -229,7 +229,12 @@ struct kvm_vcpu_arch { /* Pointer to host CPU context */ kvm_cpu_context_t *host_cpu_context; - struct kvm_guest_debug_arch host_debug_state; + struct { + /* {Break,watch}point registers */ + struct kvm_guest_debug_arch regs; + /* Statistical profiling extension */ + u64 pmscr_el1; + } host_debug_state; /* VGIC state */ struct vgic_cpu vgic_cpu; diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 47e5f0feaee8..dbadfaf850a7 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -95,6 +95,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) * - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR) * - Debug ROM Address (MDCR_EL2_TDRA) * - OS related registers (MDCR_EL2_TDOSA) + * - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB) * * Additionally, KVM only traps guest accesses to the debug registers if * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY @@ -110,8 +111,13 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); + /* + * This also clears MDCR_EL2_E2PB_MASK to disable guest access + * to the profiling buffer. + */ vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK; vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | + MDCR_EL2_TPMS | MDCR_EL2_TPMCR | MDCR_EL2_TDRA | MDCR_EL2_TDOSA); diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 4ba5c9095d03..f5154ed3da6c 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -65,6 +65,66 @@ default: write_debug(ptr[0], reg, 0); \ } +#define PMSCR_EL1 sys_reg(3, 0, 9, 9, 0) + +#define PMBLIMITR_EL1 sys_reg(3, 0, 9, 10, 0) +#define PMBLIMITR_EL1_E BIT(0) + +#define PMBIDR_EL1 sys_reg(3, 0, 9, 10, 7) +#define PMBIDR_EL1_P BIT(4) + +#define psb_csync() asm volatile("hint #17") + +static void __hyp_text __debug_save_spe_vhe(u64 *pmscr_el1) +{ + /* The vcpu can run. but it can't hide. */ +} + +static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1) +{ + u64 reg; + + /* SPE present on this CPU? */ + if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1), + ID_AA64DFR0_PMSVER_SHIFT)) + return; + + /* Yes; is it owned by EL3? */ + reg = read_sysreg_s(PMBIDR_EL1); + if (reg & PMBIDR_EL1_P) + return; + + /* No; is the host actually using the thing? */ + reg = read_sysreg_s(PMBLIMITR_EL1); + if (!(reg & PMBLIMITR_EL1_E)) + return; + + /* Yes; save the control register and disable data generation */ + *pmscr_el1 = read_sysreg_s(PMSCR_EL1); + write_sysreg_s(0, PMSCR_EL1); + isb(); + + /* Now drain all buffered data to memory */ + psb_csync(); + dsb(nsh); +} + +static hyp_alternate_select(__debug_save_spe, + __debug_save_spe_nvhe, __debug_save_spe_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +static void __hyp_text __debug_restore_spe(u64 pmscr_el1) +{ + if (!pmscr_el1) + return; + + /* The host page table is installed, but not yet synchronised */ + isb(); + + /* Re-enable data generation */ + write_sysreg_s(pmscr_el1, PMSCR_EL1); +} + void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, struct kvm_guest_debug_arch *dbg, struct kvm_cpu_context *ctxt) @@ -118,13 +178,15 @@ void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu) (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE)) vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; - __debug_save_state(vcpu, &vcpu->arch.host_debug_state, + __debug_save_state(vcpu, &vcpu->arch.host_debug_state.regs, kern_hyp_va(vcpu->arch.host_cpu_context)); + __debug_save_spe()(&vcpu->arch.host_debug_state.pmscr_el1); } void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) { - __debug_restore_state(vcpu, &vcpu->arch.host_debug_state, + __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_restore_state(vcpu, &vcpu->arch.host_debug_state.regs, kern_hyp_va(vcpu->arch.host_cpu_context)); if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 75e83dd40d43..aede1658aeda 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -103,7 +103,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) static void __hyp_text __deactivate_traps_vhe(void) { extern char vectors[]; /* kernel exception vectors */ + u64 mdcr_el2 = read_sysreg(mdcr_el2); + mdcr_el2 &= MDCR_EL2_HPMN_MASK | + MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT | + MDCR_EL2_TPMS; + + write_sysreg(mdcr_el2, mdcr_el2); write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); write_sysreg(CPACR_EL1_FPEN, cpacr_el1); write_sysreg(vectors, vbar_el1); @@ -111,6 +117,12 @@ static void __hyp_text __deactivate_traps_vhe(void) static void __hyp_text __deactivate_traps_nvhe(void) { + u64 mdcr_el2 = read_sysreg(mdcr_el2); + + mdcr_el2 &= MDCR_EL2_HPMN_MASK; + mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + + write_sysreg(mdcr_el2, mdcr_el2); write_sysreg(HCR_RW, hcr_el2); write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); } @@ -132,7 +144,6 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) __deactivate_traps_arch()(); write_sysreg(0, hstr_el2); - write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); write_sysreg(0, pmuserenr_el0); } @@ -357,6 +368,10 @@ again: } __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + /* + * This must come after restoring the host sysregs, since a non-VHE + * system may enable SPE here and make use of the TTBRs. + */ __debug_cond_restore_host_state(vcpu); return exit_code; From 7d0928f18bf890d2853281f59aba0dd5a46b34f9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 2 Feb 2017 17:32:14 +0000 Subject: [PATCH 49/74] arm64: fix erroneous __raw_read_system_reg() cases Since it was introduced in commit da8d02d19ffdd201 ("arm64/capabilities: Make use of system wide safe value"), __raw_read_system_reg() has erroneously mapped some sysreg IDs to other registers. For the fields in ID_ISAR5_EL1, our local feature detection will be erroneous. We may spuriously detect that a feature is uniformly supported, or may fail to detect when it actually is, meaning some compat hwcaps may be erroneous (or not enforced upon hotplug). This patch corrects the erroneous entries. Signed-off-by: Mark Rutland Fixes: da8d02d19ffdd201 ("arm64/capabilities: Make use of system wide safe value") Reported-by: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index ed675061dda0..0688b9a3adeb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -664,15 +664,15 @@ static u64 __raw_read_system_reg(u32 sys_id) case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR4_EL1); + case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR5_EL1); case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR0_EL1); + case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR1_EL1); case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR0_EL1); + case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR1_EL1); case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); From 965861d66fad6ce07bd5c5ecbe6514bc50b8fc11 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 2 Feb 2017 17:32:15 +0000 Subject: [PATCH 50/74] arm64: ensure __raw_read_system_reg() is self-consistent We recently discovered that __raw_read_system_reg() erroneously mapped sysreg IDs to the wrong registers. To ensure that we don't get hit by a similar issue in future, this patch makes __raw_read_system_reg() use a macro for each case statement, ensuring that each case reads the correct register. To ensure that this patch hasn't introduced an issue, I've binary-diffed the object files before and after this patch. No code or data sections differ (though some debug section differ due to line numbering changing). Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 60 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 0688b9a3adeb..1ee5357d0c6a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -645,6 +645,9 @@ u64 read_system_reg(u32 id) return regp->sys_val; } +#define read_sysreg_case(r) \ + case r: return read_sysreg_s(r) + /* * __raw_read_system_reg() - Used by a STARTING cpu before cpuinfo is populated. * Read the system register on the current CPU @@ -652,36 +655,37 @@ u64 read_system_reg(u32 id) static u64 __raw_read_system_reg(u32 sys_id) { switch (sys_id) { - case SYS_ID_PFR0_EL1: return read_cpuid(ID_PFR0_EL1); - case SYS_ID_PFR1_EL1: return read_cpuid(ID_PFR1_EL1); - case SYS_ID_DFR0_EL1: return read_cpuid(ID_DFR0_EL1); - case SYS_ID_MMFR0_EL1: return read_cpuid(ID_MMFR0_EL1); - case SYS_ID_MMFR1_EL1: return read_cpuid(ID_MMFR1_EL1); - case SYS_ID_MMFR2_EL1: return read_cpuid(ID_MMFR2_EL1); - case SYS_ID_MMFR3_EL1: return read_cpuid(ID_MMFR3_EL1); - case SYS_ID_ISAR0_EL1: return read_cpuid(ID_ISAR0_EL1); - case SYS_ID_ISAR1_EL1: return read_cpuid(ID_ISAR1_EL1); - case SYS_ID_ISAR2_EL1: return read_cpuid(ID_ISAR2_EL1); - case SYS_ID_ISAR3_EL1: return read_cpuid(ID_ISAR3_EL1); - case SYS_ID_ISAR4_EL1: return read_cpuid(ID_ISAR4_EL1); - case SYS_ID_ISAR5_EL1: return read_cpuid(ID_ISAR5_EL1); - case SYS_MVFR0_EL1: return read_cpuid(MVFR0_EL1); - case SYS_MVFR1_EL1: return read_cpuid(MVFR1_EL1); - case SYS_MVFR2_EL1: return read_cpuid(MVFR2_EL1); + read_sysreg_case(SYS_ID_PFR0_EL1); + read_sysreg_case(SYS_ID_PFR1_EL1); + read_sysreg_case(SYS_ID_DFR0_EL1); + read_sysreg_case(SYS_ID_MMFR0_EL1); + read_sysreg_case(SYS_ID_MMFR1_EL1); + read_sysreg_case(SYS_ID_MMFR2_EL1); + read_sysreg_case(SYS_ID_MMFR3_EL1); + read_sysreg_case(SYS_ID_ISAR0_EL1); + read_sysreg_case(SYS_ID_ISAR1_EL1); + read_sysreg_case(SYS_ID_ISAR2_EL1); + read_sysreg_case(SYS_ID_ISAR3_EL1); + read_sysreg_case(SYS_ID_ISAR4_EL1); + read_sysreg_case(SYS_ID_ISAR5_EL1); + read_sysreg_case(SYS_MVFR0_EL1); + read_sysreg_case(SYS_MVFR1_EL1); + read_sysreg_case(SYS_MVFR2_EL1); - case SYS_ID_AA64PFR0_EL1: return read_cpuid(ID_AA64PFR0_EL1); - case SYS_ID_AA64PFR1_EL1: return read_cpuid(ID_AA64PFR1_EL1); - case SYS_ID_AA64DFR0_EL1: return read_cpuid(ID_AA64DFR0_EL1); - case SYS_ID_AA64DFR1_EL1: return read_cpuid(ID_AA64DFR1_EL1); - case SYS_ID_AA64MMFR0_EL1: return read_cpuid(ID_AA64MMFR0_EL1); - case SYS_ID_AA64MMFR1_EL1: return read_cpuid(ID_AA64MMFR1_EL1); - case SYS_ID_AA64MMFR2_EL1: return read_cpuid(ID_AA64MMFR2_EL1); - case SYS_ID_AA64ISAR0_EL1: return read_cpuid(ID_AA64ISAR0_EL1); - case SYS_ID_AA64ISAR1_EL1: return read_cpuid(ID_AA64ISAR1_EL1); + read_sysreg_case(SYS_ID_AA64PFR0_EL1); + read_sysreg_case(SYS_ID_AA64PFR1_EL1); + read_sysreg_case(SYS_ID_AA64DFR0_EL1); + read_sysreg_case(SYS_ID_AA64DFR1_EL1); + read_sysreg_case(SYS_ID_AA64MMFR0_EL1); + read_sysreg_case(SYS_ID_AA64MMFR1_EL1); + read_sysreg_case(SYS_ID_AA64MMFR2_EL1); + read_sysreg_case(SYS_ID_AA64ISAR0_EL1); + read_sysreg_case(SYS_ID_AA64ISAR1_EL1); + + read_sysreg_case(SYS_CNTFRQ_EL0); + read_sysreg_case(SYS_CTR_EL0); + read_sysreg_case(SYS_DCZID_EL0); - case SYS_CNTFRQ_EL0: return read_cpuid(CNTFRQ_EL0); - case SYS_CTR_EL0: return read_cpuid(CTR_EL0); - case SYS_DCZID_EL0: return read_cpuid(DCZID_EL0); default: BUG(); return 0; From 757b435aaabe5e76fc8c85f767061c70a98c0218 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 2 Feb 2017 17:33:19 +0000 Subject: [PATCH 51/74] efi: arm64: Add vmlinux debug link to the Image binary When building with debugging symbols, take the absolute path to the vmlinux binary and add it to the special PE/COFF debug table entry. This allows a debug EFI build to find the vmlinux binary, which is very helpful in debugging, given that the offset where the Image is first loaded by EFI is highly unpredictable. On implementations of UEFI that choose to implement it, this information is exposed via the EFI debug support table, which is a UEFI configuration table that is accessible both by the firmware at boot time and by the OS at runtime, and lists all PE/COFF images loaded by the system. The format of the NB10 Codeview entry is based on the definition used by EDK2, which is our primary reference when it comes to the use of PE/COFF in the context of UEFI firmware. Signed-off-by: Ard Biesheuvel [will: use realpath instead of shell invocation, as discussed on list] Signed-off-by: Will Deacon --- arch/arm64/Kconfig.debug | 8 +++++++ arch/arm64/kernel/Makefile | 4 ++++ arch/arm64/kernel/head.S | 47 +++++++++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index d1ebd46872fd..f7f38b1aab14 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -95,6 +95,14 @@ config DEBUG_ALIGN_RODATA If in doubt, say N. +config DEBUG_EFI + depends on EFI && DEBUG_INFO + bool "UEFI debugging" + help + Enable this option to include EFI specific debugging features into + the kernel that are only useful when using a debug build of the + UEFI firmware + source "drivers/hwtracing/coresight/Kconfig" endmenu diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7d66bbaafc0c..1606c6b2a280 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -55,3 +55,7 @@ obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o extra-y += $(head-y) vmlinux.lds + +ifeq ($(CONFIG_DEBUG_EFI),y) +AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" +endif diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index d3b8f92de5c5..c6cc82ec190b 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -149,7 +149,7 @@ extra_header_fields: .quad 0 // SizeOfHeapReserve .quad 0 // SizeOfHeapCommit .long 0 // LoaderFlags - .long 0x6 // NumberOfRvaAndSizes + .long (section_table - .) / 8 // NumberOfRvaAndSizes .quad 0 // ExportTable .quad 0 // ImportTable @@ -158,6 +158,11 @@ extra_header_fields: .quad 0 // CertificationTable .quad 0 // BaseRelocationTable +#ifdef CONFIG_DEBUG_EFI + .long efi_debug_table - _head // DebugTable + .long efi_debug_table_size +#endif + // Section table section_table: @@ -195,6 +200,46 @@ section_table: .short 0 // NumberOfLineNumbers (0 for executables) .long 0xe0500020 // Characteristics (section flags) +#ifdef CONFIG_DEBUG_EFI + /* + * The debug table is referenced via its Relative Virtual Address (RVA), + * which is only defined for those parts of the image that are covered + * by a section declaration. Since this header is not covered by any + * section, the debug table must be emitted elsewhere. So stick it in + * the .init.rodata section instead. + * + * Note that the EFI debug entry itself may legally have a zero RVA, + * which means we can simply put it right after the section headers. + */ + __INITRODATA + + .align 2 +efi_debug_table: + // EFI_IMAGE_DEBUG_DIRECTORY_ENTRY + .long 0 // Characteristics + .long 0 // TimeDateStamp + .short 0 // MajorVersion + .short 0 // MinorVersion + .long 2 // Type == EFI_IMAGE_DEBUG_TYPE_CODEVIEW + .long efi_debug_entry_size // SizeOfData + .long 0 // RVA + .long efi_debug_entry - _head // FileOffset + + .set efi_debug_table_size, . - efi_debug_table + .previous + +efi_debug_entry: + // EFI_IMAGE_DEBUG_CODEVIEW_NB10_ENTRY + .ascii "NB10" // Signature + .long 0 // Unknown + .long 0 // Unknown2 + .long 0 // Unknown3 + + .asciz VMLINUX_PATH + + .set efi_debug_entry_size, . - efi_debug_entry +#endif + /* * EFI will load .text onwards at the 4k section alignment * described in the PE/COFF header. To ensure that instruction From 680a0873e193bae666439f4b5e32c758e68f114c Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Wed, 1 Feb 2017 11:28:27 -0600 Subject: [PATCH 52/74] arm: kernel: Add SMC structure parameter This patch adds a quirk parameter to the arm_smccc_(smc/hvc) calls. The quirk structure allows for specialized SMC operations due to SoC specific requirements. The current arm_smccc_(smc/hvc) is renamed and macros are used instead to specify the standard arm_smccc_(smc/hvc) or the arm_smccc_(smc/hvc)_quirk function. This patch and partial implementation was suggested by Will Deacon. Signed-off-by: Andy Gross Reviewed-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm/kernel/armksyms.c | 4 ++-- arch/arm/kernel/smccc-call.S | 14 +++++++----- arch/arm64/kernel/arm64ksyms.c | 4 ++-- arch/arm64/kernel/asm-offsets.c | 7 ++++-- arch/arm64/kernel/smccc-call.S | 14 +++++++----- include/linux/arm-smccc.h | 40 ++++++++++++++++++++++++++------- 6 files changed, 57 insertions(+), 26 deletions(-) diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 7e45f69a0ddc..8e8d20cdbce7 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -178,6 +178,6 @@ EXPORT_SYMBOL(__pv_offset); #endif #ifdef CONFIG_HAVE_ARM_SMCCC -EXPORT_SYMBOL(arm_smccc_smc); -EXPORT_SYMBOL(arm_smccc_hvc); +EXPORT_SYMBOL(__arm_smccc_smc); +EXPORT_SYMBOL(__arm_smccc_hvc); #endif diff --git a/arch/arm/kernel/smccc-call.S b/arch/arm/kernel/smccc-call.S index 2e48b674aab1..e5d43066b889 100644 --- a/arch/arm/kernel/smccc-call.S +++ b/arch/arm/kernel/smccc-call.S @@ -46,17 +46,19 @@ UNWIND( .fnend) /* * void smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, * unsigned long a3, unsigned long a4, unsigned long a5, - * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, + * struct arm_smccc_quirk *quirk) */ -ENTRY(arm_smccc_smc) +ENTRY(__arm_smccc_smc) SMCCC SMCCC_SMC -ENDPROC(arm_smccc_smc) +ENDPROC(__arm_smccc_smc) /* * void smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, * unsigned long a3, unsigned long a4, unsigned long a5, - * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, + * struct arm_smccc_quirk *quirk) */ -ENTRY(arm_smccc_hvc) +ENTRY(__arm_smccc_hvc) SMCCC SMCCC_HVC -ENDPROC(arm_smccc_hvc) +ENDPROC(__arm_smccc_hvc) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 78f368039c79..e9c4dc9e0ada 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -73,5 +73,5 @@ NOKPROBE_SYMBOL(_mcount); #endif /* arm-smccc */ -EXPORT_SYMBOL(arm_smccc_smc); -EXPORT_SYMBOL(arm_smccc_hvc); +EXPORT_SYMBOL(__arm_smccc_smc); +EXPORT_SYMBOL(__arm_smccc_hvc); diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index bc049afc73a7..b3bb7ef97bc8 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -143,8 +143,11 @@ int main(void) DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS, offsetof(struct sleep_stack_data, system_regs)); DEFINE(SLEEP_STACK_DATA_CALLEE_REGS, offsetof(struct sleep_stack_data, callee_saved_regs)); #endif - DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); - DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); + DEFINE(ARM_SMCCC_RES_X0_OFFS, offsetof(struct arm_smccc_res, a0)); + DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2)); + DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id)); + DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state)); + BLANK(); DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address)); DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address)); diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index ae0496fa4235..ba60a8cb07d2 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -27,17 +27,19 @@ /* * void arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, * unsigned long a3, unsigned long a4, unsigned long a5, - * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, + * struct arm_smccc_quirk *quirk) */ -ENTRY(arm_smccc_smc) +ENTRY(__arm_smccc_smc) SMCCC smc -ENDPROC(arm_smccc_smc) +ENDPROC(__arm_smccc_smc) /* * void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, * unsigned long a3, unsigned long a4, unsigned long a5, - * unsigned long a6, unsigned long a7, struct arm_smccc_res *res) + * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, + * struct arm_smccc_quirk *quirk) */ -ENTRY(arm_smccc_hvc) +ENTRY(__arm_smccc_hvc) SMCCC hvc -ENDPROC(arm_smccc_hvc) +ENDPROC(__arm_smccc_hvc) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index b5abfda80465..c66f8ae94b5a 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -72,33 +72,57 @@ struct arm_smccc_res { }; /** - * arm_smccc_smc() - make SMC calls + * struct arm_smccc_quirk - Contains quirk information + * @id: quirk identification + * @state: quirk specific information + * @a6: Qualcomm quirk entry for returning post-smc call contents of a6 + */ +struct arm_smccc_quirk { + int id; + union { + unsigned long a6; + } state; +}; + +/** + * __arm_smccc_smc() - make SMC calls * @a0-a7: arguments passed in registers 0 to 7 * @res: result values from registers 0 to 3 + * @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required. * * This function is used to make SMC calls following SMC Calling Convention. * The content of the supplied param are copied to registers 0 to 7 prior * to the SMC instruction. The return values are updated with the content - * from register 0 to 3 on return from the SMC instruction. + * from register 0 to 3 on return from the SMC instruction. An optional + * quirk structure provides vendor specific behavior. */ -asmlinkage void arm_smccc_smc(unsigned long a0, unsigned long a1, +asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5, unsigned long a6, unsigned long a7, - struct arm_smccc_res *res); + struct arm_smccc_res *res, struct arm_smccc_quirk *quirk); /** - * arm_smccc_hvc() - make HVC calls + * __arm_smccc_hvc() - make HVC calls * @a0-a7: arguments passed in registers 0 to 7 * @res: result values from registers 0 to 3 * * This function is used to make HVC calls following SMC Calling * Convention. The content of the supplied param are copied to registers 0 * to 7 prior to the HVC instruction. The return values are updated with - * the content from register 0 to 3 on return from the HVC instruction. + * the content from register 0 to 3 on return from the HVC instruction. An + * optional quirk structure provides vendor specific behavior. */ -asmlinkage void arm_smccc_hvc(unsigned long a0, unsigned long a1, +asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5, unsigned long a6, unsigned long a7, - struct arm_smccc_res *res); + struct arm_smccc_res *res, struct arm_smccc_quirk *quirk); + +#define arm_smccc_smc(...) __arm_smccc_smc(__VA_ARGS__, NULL) + +#define arm_smccc_smc_quirk(...) __arm_smccc_smc(__VA_ARGS__) + +#define arm_smccc_hvc(...) __arm_smccc_hvc(__VA_ARGS__, NULL) + +#define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__) #endif /*__LINUX_ARM_SMCCC_H*/ From 82bcd087029f6056506ea929f11af02622230901 Mon Sep 17 00:00:00 2001 From: Andy Gross Date: Wed, 1 Feb 2017 11:28:28 -0600 Subject: [PATCH 53/74] firmware: qcom: scm: Fix interrupted SCM calls This patch adds a Qualcomm specific quirk to the arm_smccc_smc call. On Qualcomm ARM64 platforms, the SMC call can return before it has completed. If this occurs, the call can be restarted, but it requires using the returned session ID value from the interrupted SMC call. The quirk stores off the session ID from the interrupted call in the quirk structure so that it can be used by the caller. This patch folds in a fix given by Sricharan R: https://lkml.org/lkml/2016/9/28/272 Signed-off-by: Andy Gross Reviewed-by: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/smccc-call.S | 9 ++++++++- drivers/firmware/qcom_scm-64.c | 13 ++++++++++--- include/linux/arm-smccc.h | 11 ++++++++--- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index ba60a8cb07d2..62522342e1e4 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -12,6 +12,7 @@ * */ #include +#include #include .macro SMCCC instr @@ -20,7 +21,13 @@ ldr x4, [sp] stp x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS] stp x2, x3, [x4, #ARM_SMCCC_RES_X2_OFFS] - ret + ldr x4, [sp, #8] + cbz x4, 1f /* no quirk structure */ + ldr x9, [x4, #ARM_SMCCC_QUIRK_ID_OFFS] + cmp x9, #ARM_SMCCC_QUIRK_QCOM_A6 + b.ne 1f + str x6, [x4, ARM_SMCCC_QUIRK_STATE_OFFS] +1: ret .cfi_endproc .endm diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c index 4a0f5ead4fb5..1e2e5198db53 100644 --- a/drivers/firmware/qcom_scm-64.c +++ b/drivers/firmware/qcom_scm-64.c @@ -91,6 +91,7 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id, dma_addr_t args_phys = 0; void *args_virt = NULL; size_t alloc_len; + struct arm_smccc_quirk quirk = {.id = ARM_SMCCC_QUIRK_QCOM_A6}; if (unlikely(arglen > N_REGISTER_ARGS)) { alloc_len = N_EXT_QCOM_SCM_ARGS * sizeof(u64); @@ -131,10 +132,16 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id, qcom_smccc_convention, ARM_SMCCC_OWNER_SIP, fn_id); + quirk.state.a6 = 0; + do { - arm_smccc_smc(cmd, desc->arginfo, desc->args[0], - desc->args[1], desc->args[2], x5, 0, 0, - res); + arm_smccc_smc_quirk(cmd, desc->arginfo, desc->args[0], + desc->args[1], desc->args[2], x5, + quirk.state.a6, 0, res, &quirk); + + if (res->a0 == QCOM_SCM_INTERRUPTED) + cmd = res->a0; + } while (res->a0 == QCOM_SCM_INTERRUPTED); mutex_unlock(&qcom_scm_lock); diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index c66f8ae94b5a..b67934164401 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -14,9 +14,6 @@ #ifndef __LINUX_ARM_SMCCC_H #define __LINUX_ARM_SMCCC_H -#include -#include - /* * This file provides common defines for ARM SMC Calling Convention as * specified in @@ -60,6 +57,13 @@ #define ARM_SMCCC_OWNER_TRUSTED_OS 50 #define ARM_SMCCC_OWNER_TRUSTED_OS_END 63 +#define ARM_SMCCC_QUIRK_NONE 0 +#define ARM_SMCCC_QUIRK_QCOM_A6 1 /* Save/restore register a6 */ + +#ifndef __ASSEMBLY__ + +#include +#include /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 @@ -125,4 +129,5 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, #define arm_smccc_hvc_quirk(...) __arm_smccc_hvc(__VA_ARGS__) +#endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ From b128cb55f066caeb9859b99795d78e9213a58d2a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 23 Nov 2016 22:39:52 +0800 Subject: [PATCH 54/74] arm: perf: use builtin_platform_driver Use builtin_platform_driver() helper to simplify the code. Signed-off-by: Geliang Tang Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 6 +----- arch/arm/kernel/perf_event_v7.c | 6 +----- arch/arm/kernel/perf_event_xscale.c | 6 +----- 3 files changed, 3 insertions(+), 15 deletions(-) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 09413e7b49aa..96b7a477a8db 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -581,9 +581,5 @@ static struct platform_driver armv6_pmu_driver = { .probe = armv6_pmu_device_probe, }; -static int __init register_armv6_pmu_driver(void) -{ - return platform_driver_register(&armv6_pmu_driver); -} -device_initcall(register_armv6_pmu_driver); +builtin_platform_driver(armv6_pmu_driver); #endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index b9423491b9d7..ab6522b43659 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -2034,9 +2034,5 @@ static struct platform_driver armv7_pmu_driver = { .probe = armv7_pmu_device_probe, }; -static int __init register_armv7_pmu_driver(void) -{ - return platform_driver_register(&armv7_pmu_driver); -} -device_initcall(register_armv7_pmu_driver); +builtin_platform_driver(armv7_pmu_driver); #endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index aa0499e2eef7..0e51f5e4f879 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -767,9 +767,5 @@ static struct platform_driver xscale_pmu_driver = { .probe = xscale_pmu_device_probe, }; -static int __init register_xscale_pmu_driver(void) -{ - return platform_driver_register(&xscale_pmu_driver); -} -device_initcall(register_xscale_pmu_driver); +builtin_platform_driver(xscale_pmu_driver); #endif /* CONFIG_CPU_XSCALE */ From c0bfc549e96231e0ead4424de6e4933fde819d70 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 25 Jan 2017 15:46:58 -0800 Subject: [PATCH 55/74] perf: xgene: Include module.h I ran into a build error when I disabled CONFIG_ACPI and tried to compile this driver: drivers/perf/xgene_pmu.c:1242:1: warning: data definition has no type or storage class MODULE_DEVICE_TABLE(of, xgene_pmu_of_match); ^ drivers/perf/xgene_pmu.c:1242:1: error: type defaults to 'int' in declaration of 'MODULE_DEVICE_TABLE' [-Werror=implicit-int] Include module.h for the MODULE_DEVICE_TABLE macro that's implicitly included through ACPI. Tested-by: Tai Nguyen Signed-off-by: Stephen Boyd Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index a8ac4bcef2c0..35b5289bc5da 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include From 6d526ee26ccdcbf8eb9ddd0b584b35ac17f8c566 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Dec 2016 09:11:47 +0000 Subject: [PATCH 56/74] arm64: mm: enable CONFIG_HOLES_IN_ZONE for NUMA The NUMA code may get confused by the presence of NOMAP regions within zones, resulting in spurious BUG() checks where the node id deviates from the containing zone's node id. Since the kernel has no business reasoning about node ids of pages it does not own in the first place, enable CONFIG_HOLES_IN_ZONE to ensure that such pages are disregarded. Acked-by: Robert Richter Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0ce23130cc9b..f45405664558 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -625,6 +625,10 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK def_bool y depends on NUMA +config HOLES_IN_ZONE + def_bool y + depends on NUMA + source kernel/Kconfig.preempt source kernel/Kconfig.hz From 030abd8a5d33057524c6c11c28d3191f3b5c63f6 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 5 Jan 2017 17:32:16 +0000 Subject: [PATCH 57/74] ACPI/IORT: Fix iort_node_get_id() mapping entries indexing Commit 618f535a6062 ("ACPI/IORT: Add single mapping function") introduced a function (iort_node_get_id()) to retrieve ids for IORT named components. The iort_node_get_id() takes an index as input to refer to a specific mapping entry in the named component IORT node mapping array. For a mapping entry at a given index, iort_node_get_id() should return the id value (through the id_out function parameter) and the IORT node output_reference (through function return value) the given mapping entry refers to. Technically output_reference values may differ for different map entries, (see diagram below - mapped id values may refer to different eg IORT SMMU nodes; the kernel may not be able to handle different output_reference values for a given named component but the IORT kernel layer should still report the IORT mappings as reported by firmware) but current code in iort_node_get_id() fails to use the index function parameter to return the correct output_reference value (ie it always returns the output_reference value of the first entry in the mapping array whilst using the index correctly to retrieve the id value from the respective entry). |----------------------| | named component | |----------------------| | map entry[0] | |----------------------| | id value | | output_reference----------------> eg SMMU 1 |----------------------| | map entry[1] | |----------------------| | id value | | output_reference----------------> eg SMMU 2 |----------------------| . . . |----------------------| | map entry[N] | |----------------------| | id value | | output_reference----------------> eg SMMU 1 |----------------------| Consequently the iort_node_get_id() function always returns the IORT node pointed at by the output_reference value of the first named component mapping array entry, irrespective of the index parameter, which is a bug. Update the map array entry pointer computation in iort_node_get_id() to take into account the index value, fixing the issue. Fixes: 618f535a6062 ("ACPI/IORT: Add single mapping function") Reported-by: Hanjun Guo Reviewed-by: Hanjun Guo Signed-off-by: Lorenzo Pieralisi Cc: Hanjun Guo Cc: Sinan Kaya Cc: Tomasz Nowicki Cc: Nate Watterson Cc: "Rafael J. Wysocki" Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index e0d2e6e6e40c..ba156c5afc16 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -333,7 +333,7 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node, return NULL; map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node, - node->mapping_offset); + node->mapping_offset + index * sizeof(*map)); /* Firmware bug! */ if (!map->output_reference) { @@ -348,10 +348,10 @@ struct acpi_iort_node *iort_node_get_id(struct acpi_iort_node *node, if (!(IORT_TYPE_MASK(parent->type) & type_mask)) return NULL; - if (map[index].flags & ACPI_IORT_ID_SINGLE_MAPPING) { + if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) { if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT || node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) { - *id_out = map[index].output_base; + *id_out = map->output_base; return parent; } } From 5e5afa6cbdae5cc2ceda232a68a9ddc943d1d5ce Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 17 Jan 2017 16:36:23 +0300 Subject: [PATCH 58/74] ACPI/IORT: Fix the error return code in iort_add_smmu_platform_device() The function iort_add_smmu_platform_device() accidentally returns 0 (ie PTR_ERR(pdev) where pdev == NULL) if platform_device_alloc() fails; fix the bug by returning a proper error value. Fixes: 846f0e9e74a0 ("ACPI/IORT: Add support for ARM SMMU platform devices creation") Acked-by: Hanjun Guo Signed-off-by: Dan Carpenter [lorenzo.pieralisi@arm.com: improved commit log] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index ba156c5afc16..6d78b467d0ec 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -828,7 +828,7 @@ static int __init iort_add_smmu_platform_device(struct acpi_iort_node *node) pdev = platform_device_alloc(ops->name, PLATFORM_DEVID_AUTO); if (!pdev) - return PTR_ERR(pdev); + return -ENOMEM; count = ops->iommu_count_resources(node); From b7eed6ddaa71f1a9afe2ba481d69dea3af0e0755 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Mon, 6 Feb 2017 19:46:22 +0530 Subject: [PATCH 59/74] arm64: do not trace atomic operations Atomic operation function symbols are exported,when CONFIG_ARM64_LSE_ATOMICS is defined. Prefix them with notrace, so that an user can not trace these functions. Tracing these functions causes kernel crash. Signed-off-by: Pratyush Anand Signed-off-by: Will Deacon --- arch/arm64/include/asm/lse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index fc756e22c84c..606b20910a5c 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -19,7 +19,7 @@ __asm__(".arch_extension lse"); /* Move the ll/sc atomics out-of-line */ -#define __LL_SC_INLINE +#define __LL_SC_INLINE notrace #define __LL_SC_PREFIX(x) __ll_sc_##x #define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x)) From 3046ec674d441562c6bb3e4284cd866743042ef3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 8 Feb 2017 14:54:12 +0000 Subject: [PATCH 60/74] ARM: smccc: Update HVC comment to describe new quirk parameter Commit 680a0873e193 ("arm: kernel: Add SMC structure parameter") added a new "quirk" parameter to the SMC and HVC SMCCC backends, but only updated the comment for the SMC version. This patch adds the new paramater to the comment describing the HVC version too. Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index b67934164401..4c5bca38c653 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -109,6 +109,7 @@ asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1, * __arm_smccc_hvc() - make HVC calls * @a0-a7: arguments passed in registers 0 to 7 * @res: result values from registers 0 to 3 + * @quirk: points to an arm_smccc_quirk, or NULL when no quirks are required. * * This function is used to make HVC calls following SMC Calling * Convention. The content of the supplied param are copied to registers 0 From fe0a7ef74d1f65b86820a54636323df2b31e0c3f Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 6 Feb 2017 16:00:09 +0000 Subject: [PATCH 61/74] arm64: remove wrong CONFIG_PROC_SYSCTL ifdef The sysfs cpu_capacity entry for each CPU has nothing to do with PROC_FS, nor it's in /proc/sys path. Remove such ifdef. Cc: Will Deacon Cc: Catalin Marinas Reported-and-suggested-by: Sudeep Holla Fixes: be8f185d8af4 ('arm64: add sysfs cpu_capacity attribute') Signed-off-by: Juri Lelli Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 23e9e13bd2aa..62b370388d72 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -40,7 +40,6 @@ static void set_capacity_scale(unsigned int cpu, unsigned long capacity) per_cpu(cpu_scale, cpu) = capacity; } -#ifdef CONFIG_PROC_SYSCTL static ssize_t cpu_capacity_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -97,7 +96,6 @@ static int register_cpu_capacity_sysctl(void) return 0; } subsys_initcall(register_cpu_capacity_sysctl); -#endif static u32 capacity_scale; static u32 *raw_capacity; From 21bdbb7102edeaebb5ec4ef530c8f442f7562c96 Mon Sep 17 00:00:00 2001 From: Neil Leeder Date: Tue, 7 Feb 2017 13:14:04 -0500 Subject: [PATCH 62/74] perf: add qcom l2 cache perf events driver Adds perf events support for L2 cache PMU. The L2 cache PMU driver is named 'l2cache_0' and can be used with perf events to profile L2 events such as cache hits and misses on Qualcomm Technologies processors. Reviewed-by: Mark Rutland Signed-off-by: Neil Leeder [will: minimise nesting in l2_cache_associate_cpu_with_cluster] [will: use kstrtoul for unsigned long, remove redunant .owner setting] Signed-off-by: Will Deacon --- Documentation/perf/qcom_l2_pmu.txt | 38 ++ drivers/perf/Kconfig | 9 + drivers/perf/Makefile | 1 + drivers/perf/qcom_l2_pmu.c | 1013 ++++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 5 files changed, 1062 insertions(+) create mode 100644 Documentation/perf/qcom_l2_pmu.txt create mode 100644 drivers/perf/qcom_l2_pmu.c diff --git a/Documentation/perf/qcom_l2_pmu.txt b/Documentation/perf/qcom_l2_pmu.txt new file mode 100644 index 000000000000..b25b97659ab9 --- /dev/null +++ b/Documentation/perf/qcom_l2_pmu.txt @@ -0,0 +1,38 @@ +Qualcomm Technologies Level-2 Cache Performance Monitoring Unit (PMU) +===================================================================== + +This driver supports the L2 cache clusters found in Qualcomm Technologies +Centriq SoCs. There are multiple physical L2 cache clusters, each with their +own PMU. Each cluster has one or more CPUs associated with it. + +There is one logical L2 PMU exposed, which aggregates the results from +the physical PMUs. + +The driver provides a description of its available events and configuration +options in sysfs, see /sys/devices/l2cache_0. + +The "format" directory describes the format of the events. + +Events can be envisioned as a 2-dimensional array. Each column represents +a group of events. There are 8 groups. Only one entry from each +group can be in use at a time. If multiple events from the same group +are specified, the conflicting events cannot be counted at the same time. + +Events are specified as 0xCCG, where CC is 2 hex digits specifying +the code (array row) and G specifies the group (column) 0-7. + +In addition there is a cycle counter event specified by the value 0xFE +which is outside the above scheme. + +The driver provides a "cpumask" sysfs attribute which contains a mask +consisting of one CPU per cluster which will be used to handle all the PMU +events on that cluster. + +Examples for use with perf: + + perf stat -e l2cache_0/config=0x001/,l2cache_0/config=0x042/ -a sleep 1 + + perf stat -e l2cache_0/config=0xfe/ -C 2 sleep 1 + +The driver does not support sampling, therefore "perf record" will +not work. Per-task perf sessions are not supported. diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 4d5c5f9f0dbd..93651907874f 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -12,6 +12,15 @@ config ARM_PMU Say y if you want to use CPU performance monitors on ARM-based systems. +config QCOM_L2_PMU + bool "Qualcomm Technologies L2-cache PMU" + depends on ARCH_QCOM && ARM64 && PERF_EVENTS && ACPI + help + Provides support for the L2 cache performance monitor unit (PMU) + in Qualcomm Technologies processors. + Adds the L2 cache PMU into the perf events subsystem for + monitoring L2 cache events. + config XGENE_PMU depends on PERF_EVENTS && ARCH_XGENE bool "APM X-Gene SoC PMU" diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index b116e982810b..ef24833c94a8 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_ARM_PMU) += arm_pmu.o +obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c new file mode 100644 index 000000000000..c259848228b4 --- /dev/null +++ b/drivers/perf/qcom_l2_pmu.c @@ -0,0 +1,1013 @@ +/* Copyright (c) 2015-2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define MAX_L2_CTRS 9 + +#define L2PMCR_NUM_EV_SHIFT 11 +#define L2PMCR_NUM_EV_MASK 0x1F + +#define L2PMCR 0x400 +#define L2PMCNTENCLR 0x403 +#define L2PMCNTENSET 0x404 +#define L2PMINTENCLR 0x405 +#define L2PMINTENSET 0x406 +#define L2PMOVSCLR 0x407 +#define L2PMOVSSET 0x408 +#define L2PMCCNTCR 0x409 +#define L2PMCCNTR 0x40A +#define L2PMCCNTSR 0x40C +#define L2PMRESR 0x410 +#define IA_L2PMXEVCNTCR_BASE 0x420 +#define IA_L2PMXEVCNTR_BASE 0x421 +#define IA_L2PMXEVFILTER_BASE 0x423 +#define IA_L2PMXEVTYPER_BASE 0x424 + +#define IA_L2_REG_OFFSET 0x10 + +#define L2PMXEVFILTER_SUFILTER_ALL 0x000E0000 +#define L2PMXEVFILTER_ORGFILTER_IDINDEP 0x00000004 +#define L2PMXEVFILTER_ORGFILTER_ALL 0x00000003 + +#define L2EVTYPER_REG_SHIFT 3 + +#define L2PMRESR_GROUP_BITS 8 +#define L2PMRESR_GROUP_MASK GENMASK(7, 0) + +#define L2CYCLE_CTR_BIT 31 +#define L2CYCLE_CTR_RAW_CODE 0xFE + +#define L2PMCR_RESET_ALL 0x6 +#define L2PMCR_COUNTERS_ENABLE 0x1 +#define L2PMCR_COUNTERS_DISABLE 0x0 + +#define L2PMRESR_EN BIT_ULL(63) + +#define L2_EVT_MASK 0x00000FFF +#define L2_EVT_CODE_MASK 0x00000FF0 +#define L2_EVT_GRP_MASK 0x0000000F +#define L2_EVT_CODE_SHIFT 4 +#define L2_EVT_GRP_SHIFT 0 + +#define L2_EVT_CODE(event) (((event) & L2_EVT_CODE_MASK) >> L2_EVT_CODE_SHIFT) +#define L2_EVT_GROUP(event) (((event) & L2_EVT_GRP_MASK) >> L2_EVT_GRP_SHIFT) + +#define L2_EVT_GROUP_MAX 7 + +#define L2_COUNTER_RELOAD BIT_ULL(31) +#define L2_CYCLE_COUNTER_RELOAD BIT_ULL(63) + +#define L2CPUSRSELR_EL1 sys_reg(3, 3, 15, 0, 6) +#define L2CPUSRDR_EL1 sys_reg(3, 3, 15, 0, 7) + +#define reg_idx(reg, i) (((i) * IA_L2_REG_OFFSET) + reg##_BASE) + +static DEFINE_RAW_SPINLOCK(l2_access_lock); + +/** + * set_l2_indirect_reg: write value to an L2 register + * @reg: Address of L2 register. + * @value: Value to be written to register. + * + * Use architecturally required barriers for ordering between system register + * accesses + */ +static void set_l2_indirect_reg(u64 reg, u64 val) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2_access_lock, flags); + write_sysreg_s(reg, L2CPUSRSELR_EL1); + isb(); + write_sysreg_s(val, L2CPUSRDR_EL1); + isb(); + raw_spin_unlock_irqrestore(&l2_access_lock, flags); +} + +/** + * get_l2_indirect_reg: read an L2 register value + * @reg: Address of L2 register. + * + * Use architecturally required barriers for ordering between system register + * accesses + */ +static u64 get_l2_indirect_reg(u64 reg) +{ + u64 val; + unsigned long flags; + + raw_spin_lock_irqsave(&l2_access_lock, flags); + write_sysreg_s(reg, L2CPUSRSELR_EL1); + isb(); + val = read_sysreg_s(L2CPUSRDR_EL1); + raw_spin_unlock_irqrestore(&l2_access_lock, flags); + + return val; +} + +struct cluster_pmu; + +/* + * Aggregate PMU. Implements the core pmu functions and manages + * the hardware PMUs. + */ +struct l2cache_pmu { + struct hlist_node node; + u32 num_pmus; + struct pmu pmu; + int num_counters; + cpumask_t cpumask; + struct platform_device *pdev; + struct cluster_pmu * __percpu *pmu_cluster; + struct list_head clusters; +}; + +/* + * The cache is made up of one or more clusters, each cluster has its own PMU. + * Each cluster is associated with one or more CPUs. + * This structure represents one of the hardware PMUs. + * + * Events can be envisioned as a 2-dimensional array. Each column represents + * a group of events. There are 8 groups. Only one entry from each + * group can be in use at a time. + * + * Events are specified as 0xCCG, where CC is 2 hex digits specifying + * the code (array row) and G specifies the group (column). + * + * In addition there is a cycle counter event specified by L2CYCLE_CTR_RAW_CODE + * which is outside the above scheme. + */ +struct cluster_pmu { + struct list_head next; + struct perf_event *events[MAX_L2_CTRS]; + struct l2cache_pmu *l2cache_pmu; + DECLARE_BITMAP(used_counters, MAX_L2_CTRS); + DECLARE_BITMAP(used_groups, L2_EVT_GROUP_MAX + 1); + int irq; + int cluster_id; + /* The CPU that is used for collecting events on this cluster */ + int on_cpu; + /* All the CPUs associated with this cluster */ + cpumask_t cluster_cpus; + spinlock_t pmu_lock; +}; + +#define to_l2cache_pmu(p) (container_of(p, struct l2cache_pmu, pmu)) + +static u32 l2_cycle_ctr_idx; +static u32 l2_counter_present_mask; + +static inline u32 idx_to_reg_bit(u32 idx) +{ + if (idx == l2_cycle_ctr_idx) + return BIT(L2CYCLE_CTR_BIT); + + return BIT(idx); +} + +static inline struct cluster_pmu *get_cluster_pmu( + struct l2cache_pmu *l2cache_pmu, int cpu) +{ + return *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu); +} + +static void cluster_pmu_reset(void) +{ + /* Reset all counters */ + set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL); + set_l2_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask); + set_l2_indirect_reg(L2PMINTENCLR, l2_counter_present_mask); + set_l2_indirect_reg(L2PMOVSCLR, l2_counter_present_mask); +} + +static inline void cluster_pmu_enable(void) +{ + set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE); +} + +static inline void cluster_pmu_disable(void) +{ + set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE); +} + +static inline void cluster_pmu_counter_set_value(u32 idx, u64 value) +{ + if (idx == l2_cycle_ctr_idx) + set_l2_indirect_reg(L2PMCCNTR, value); + else + set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value); +} + +static inline u64 cluster_pmu_counter_get_value(u32 idx) +{ + u64 value; + + if (idx == l2_cycle_ctr_idx) + value = get_l2_indirect_reg(L2PMCCNTR); + else + value = get_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx)); + + return value; +} + +static inline void cluster_pmu_counter_enable(u32 idx) +{ + set_l2_indirect_reg(L2PMCNTENSET, idx_to_reg_bit(idx)); +} + +static inline void cluster_pmu_counter_disable(u32 idx) +{ + set_l2_indirect_reg(L2PMCNTENCLR, idx_to_reg_bit(idx)); +} + +static inline void cluster_pmu_counter_enable_interrupt(u32 idx) +{ + set_l2_indirect_reg(L2PMINTENSET, idx_to_reg_bit(idx)); +} + +static inline void cluster_pmu_counter_disable_interrupt(u32 idx) +{ + set_l2_indirect_reg(L2PMINTENCLR, idx_to_reg_bit(idx)); +} + +static inline void cluster_pmu_set_evccntcr(u32 val) +{ + set_l2_indirect_reg(L2PMCCNTCR, val); +} + +static inline void cluster_pmu_set_evcntcr(u32 ctr, u32 val) +{ + set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTCR, ctr), val); +} + +static inline void cluster_pmu_set_evtyper(u32 ctr, u32 val) +{ + set_l2_indirect_reg(reg_idx(IA_L2PMXEVTYPER, ctr), val); +} + +static void cluster_pmu_set_resr(struct cluster_pmu *cluster, + u32 event_group, u32 event_cc) +{ + u64 field; + u64 resr_val; + u32 shift; + unsigned long flags; + + shift = L2PMRESR_GROUP_BITS * event_group; + field = ((u64)(event_cc & L2PMRESR_GROUP_MASK) << shift); + + spin_lock_irqsave(&cluster->pmu_lock, flags); + + resr_val = get_l2_indirect_reg(L2PMRESR); + resr_val &= ~(L2PMRESR_GROUP_MASK << shift); + resr_val |= field; + resr_val |= L2PMRESR_EN; + set_l2_indirect_reg(L2PMRESR, resr_val); + + spin_unlock_irqrestore(&cluster->pmu_lock, flags); +} + +/* + * Hardware allows filtering of events based on the originating + * CPU. Turn this off by setting filter bits to allow events from + * all CPUS, subunits and ID independent events in this cluster. + */ +static inline void cluster_pmu_set_evfilter_sys_mode(u32 ctr) +{ + u32 val = L2PMXEVFILTER_SUFILTER_ALL | + L2PMXEVFILTER_ORGFILTER_IDINDEP | + L2PMXEVFILTER_ORGFILTER_ALL; + + set_l2_indirect_reg(reg_idx(IA_L2PMXEVFILTER, ctr), val); +} + +static inline u32 cluster_pmu_getreset_ovsr(void) +{ + u32 result = get_l2_indirect_reg(L2PMOVSSET); + + set_l2_indirect_reg(L2PMOVSCLR, result); + return result; +} + +static inline bool cluster_pmu_has_overflowed(u32 ovsr) +{ + return !!(ovsr & l2_counter_present_mask); +} + +static inline bool cluster_pmu_counter_has_overflowed(u32 ovsr, u32 idx) +{ + return !!(ovsr & idx_to_reg_bit(idx)); +} + +static void l2_cache_event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev, now; + u32 idx = hwc->idx; + + do { + prev = local64_read(&hwc->prev_count); + now = cluster_pmu_counter_get_value(idx); + } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); + + /* + * The cycle counter is 64-bit, but all other counters are + * 32-bit, and we must handle 32-bit overflow explicitly. + */ + delta = now - prev; + if (idx != l2_cycle_ctr_idx) + delta &= 0xffffffff; + + local64_add(delta, &event->count); +} + +static void l2_cache_cluster_set_period(struct cluster_pmu *cluster, + struct hw_perf_event *hwc) +{ + u32 idx = hwc->idx; + u64 new; + + /* + * We limit the max period to half the max counter value so + * that even in the case of extreme interrupt latency the + * counter will (hopefully) not wrap past its initial value. + */ + if (idx == l2_cycle_ctr_idx) + new = L2_CYCLE_COUNTER_RELOAD; + else + new = L2_COUNTER_RELOAD; + + local64_set(&hwc->prev_count, new); + cluster_pmu_counter_set_value(idx, new); +} + +static int l2_cache_get_event_idx(struct cluster_pmu *cluster, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx; + int num_ctrs = cluster->l2cache_pmu->num_counters - 1; + unsigned int group; + + if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) { + if (test_and_set_bit(l2_cycle_ctr_idx, cluster->used_counters)) + return -EAGAIN; + + return l2_cycle_ctr_idx; + } + + idx = find_first_zero_bit(cluster->used_counters, num_ctrs); + if (idx == num_ctrs) + /* The counters are all in use. */ + return -EAGAIN; + + /* + * Check for column exclusion: event column already in use by another + * event. This is for events which are not in the same group. + * Conflicting events in the same group are detected in event_init. + */ + group = L2_EVT_GROUP(hwc->config_base); + if (test_bit(group, cluster->used_groups)) + return -EAGAIN; + + set_bit(idx, cluster->used_counters); + set_bit(group, cluster->used_groups); + + return idx; +} + +static void l2_cache_clear_event_idx(struct cluster_pmu *cluster, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + clear_bit(idx, cluster->used_counters); + if (hwc->config_base != L2CYCLE_CTR_RAW_CODE) + clear_bit(L2_EVT_GROUP(hwc->config_base), cluster->used_groups); +} + +static irqreturn_t l2_cache_handle_irq(int irq_num, void *data) +{ + struct cluster_pmu *cluster = data; + int num_counters = cluster->l2cache_pmu->num_counters; + u32 ovsr; + int idx; + + ovsr = cluster_pmu_getreset_ovsr(); + if (!cluster_pmu_has_overflowed(ovsr)) + return IRQ_NONE; + + for_each_set_bit(idx, cluster->used_counters, num_counters) { + struct perf_event *event = cluster->events[idx]; + struct hw_perf_event *hwc; + + if (WARN_ON_ONCE(!event)) + continue; + + if (!cluster_pmu_counter_has_overflowed(ovsr, idx)) + continue; + + l2_cache_event_update(event); + hwc = &event->hw; + + l2_cache_cluster_set_period(cluster, hwc); + } + + return IRQ_HANDLED; +} + +/* + * Implementation of abstract pmu functionality required by + * the core perf events code. + */ + +static void l2_cache_pmu_enable(struct pmu *pmu) +{ + /* + * Although there is only one PMU (per socket) controlling multiple + * physical PMUs (per cluster), because we do not support per-task mode + * each event is associated with a CPU. Each event has pmu_enable + * called on its CPU, so here it is only necessary to enable the + * counters for the current CPU. + */ + + cluster_pmu_enable(); +} + +static void l2_cache_pmu_disable(struct pmu *pmu) +{ + cluster_pmu_disable(); +} + +static int l2_cache_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct cluster_pmu *cluster; + struct perf_event *sibling; + struct l2cache_pmu *l2cache_pmu; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + l2cache_pmu = to_l2cache_pmu(event->pmu); + + if (hwc->sample_period) { + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "Sampling not supported\n"); + return -EOPNOTSUPP; + } + + if (event->cpu < 0) { + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "Per-task mode not supported\n"); + return -EOPNOTSUPP; + } + + /* We cannot filter accurately so we just don't allow it. */ + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_hv || event->attr.exclude_idle) { + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "Can't exclude execution levels\n"); + return -EOPNOTSUPP; + } + + if (((L2_EVT_GROUP(event->attr.config) > L2_EVT_GROUP_MAX) || + ((event->attr.config & ~L2_EVT_MASK) != 0)) && + (event->attr.config != L2CYCLE_CTR_RAW_CODE)) { + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "Invalid config %llx\n", + event->attr.config); + return -EINVAL; + } + + /* Don't allow groups with mixed PMUs, except for s/w events */ + if (event->group_leader->pmu != event->pmu && + !is_software_event(event->group_leader)) { + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "Can't create mixed PMU group\n"); + return -EINVAL; + } + + list_for_each_entry(sibling, &event->group_leader->sibling_list, + group_entry) + if (sibling->pmu != event->pmu && + !is_software_event(sibling)) { + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "Can't create mixed PMU group\n"); + return -EINVAL; + } + + cluster = get_cluster_pmu(l2cache_pmu, event->cpu); + if (!cluster) { + /* CPU has not been initialised */ + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "CPU%d not associated with L2 cluster\n", event->cpu); + return -EINVAL; + } + + /* Ensure all events in a group are on the same cpu */ + if ((event->group_leader != event) && + (cluster->on_cpu != event->group_leader->cpu)) { + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "Can't create group on CPUs %d and %d", + event->cpu, event->group_leader->cpu); + return -EINVAL; + } + + if ((event != event->group_leader) && + (L2_EVT_GROUP(event->group_leader->attr.config) == + L2_EVT_GROUP(event->attr.config))) { + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "Column exclusion: conflicting events %llx %llx\n", + event->group_leader->attr.config, + event->attr.config); + return -EINVAL; + } + + list_for_each_entry(sibling, &event->group_leader->sibling_list, + group_entry) { + if ((sibling != event) && + (L2_EVT_GROUP(sibling->attr.config) == + L2_EVT_GROUP(event->attr.config))) { + dev_dbg_ratelimited(&l2cache_pmu->pdev->dev, + "Column exclusion: conflicting events %llx %llx\n", + sibling->attr.config, + event->attr.config); + return -EINVAL; + } + } + + hwc->idx = -1; + hwc->config_base = event->attr.config; + + /* + * Ensure all events are on the same cpu so all events are in the + * same cpu context, to avoid races on pmu_enable etc. + */ + event->cpu = cluster->on_cpu; + + return 0; +} + +static void l2_cache_event_start(struct perf_event *event, int flags) +{ + struct cluster_pmu *cluster; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u32 config; + u32 event_cc, event_group; + + hwc->state = 0; + + cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu); + + l2_cache_cluster_set_period(cluster, hwc); + + if (hwc->config_base == L2CYCLE_CTR_RAW_CODE) { + cluster_pmu_set_evccntcr(0); + } else { + config = hwc->config_base; + event_cc = L2_EVT_CODE(config); + event_group = L2_EVT_GROUP(config); + + cluster_pmu_set_evcntcr(idx, 0); + cluster_pmu_set_evtyper(idx, event_group); + cluster_pmu_set_resr(cluster, event_group, event_cc); + cluster_pmu_set_evfilter_sys_mode(idx); + } + + cluster_pmu_counter_enable_interrupt(idx); + cluster_pmu_counter_enable(idx); +} + +static void l2_cache_event_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (hwc->state & PERF_HES_STOPPED) + return; + + cluster_pmu_counter_disable_interrupt(idx); + cluster_pmu_counter_disable(idx); + + if (flags & PERF_EF_UPDATE) + l2_cache_event_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; +} + +static int l2_cache_event_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + int idx; + int err = 0; + struct cluster_pmu *cluster; + + cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu); + + idx = l2_cache_get_event_idx(cluster, event); + if (idx < 0) + return idx; + + hwc->idx = idx; + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + cluster->events[idx] = event; + local64_set(&hwc->prev_count, 0); + + if (flags & PERF_EF_START) + l2_cache_event_start(event, flags); + + /* Propagate changes to the userspace mapping. */ + perf_event_update_userpage(event); + + return err; +} + +static void l2_cache_event_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct cluster_pmu *cluster; + int idx = hwc->idx; + + cluster = get_cluster_pmu(to_l2cache_pmu(event->pmu), event->cpu); + + l2_cache_event_stop(event, flags | PERF_EF_UPDATE); + cluster->events[idx] = NULL; + l2_cache_clear_event_idx(cluster, event); + + perf_event_update_userpage(event); +} + +static void l2_cache_event_read(struct perf_event *event) +{ + l2_cache_event_update(event); +} + +static ssize_t l2_cache_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct l2cache_pmu *l2cache_pmu = to_l2cache_pmu(dev_get_drvdata(dev)); + + return cpumap_print_to_pagebuf(true, buf, &l2cache_pmu->cpumask); +} + +static struct device_attribute l2_cache_pmu_cpumask_attr = + __ATTR(cpumask, S_IRUGO, l2_cache_pmu_cpumask_show, NULL); + +static struct attribute *l2_cache_pmu_cpumask_attrs[] = { + &l2_cache_pmu_cpumask_attr.attr, + NULL, +}; + +static struct attribute_group l2_cache_pmu_cpumask_group = { + .attrs = l2_cache_pmu_cpumask_attrs, +}; + +/* CCG format for perf RAW codes. */ +PMU_FORMAT_ATTR(l2_code, "config:4-11"); +PMU_FORMAT_ATTR(l2_group, "config:0-3"); +static struct attribute *l2_cache_pmu_formats[] = { + &format_attr_l2_code.attr, + &format_attr_l2_group.attr, + NULL, +}; + +static struct attribute_group l2_cache_pmu_format_group = { + .name = "format", + .attrs = l2_cache_pmu_formats, +}; + +static const struct attribute_group *l2_cache_pmu_attr_grps[] = { + &l2_cache_pmu_format_group, + &l2_cache_pmu_cpumask_group, + NULL, +}; + +/* + * Generic device handlers + */ + +static const struct acpi_device_id l2_cache_pmu_acpi_match[] = { + { "QCOM8130", }, + { } +}; + +static int get_num_counters(void) +{ + int val; + + val = get_l2_indirect_reg(L2PMCR); + + /* + * Read number of counters from L2PMCR and add 1 + * for the cycle counter. + */ + return ((val >> L2PMCR_NUM_EV_SHIFT) & L2PMCR_NUM_EV_MASK) + 1; +} + +static struct cluster_pmu *l2_cache_associate_cpu_with_cluster( + struct l2cache_pmu *l2cache_pmu, int cpu) +{ + u64 mpidr; + int cpu_cluster_id; + struct cluster_pmu *cluster = NULL; + + /* + * This assumes that the cluster_id is in MPIDR[aff1] for + * single-threaded cores, and MPIDR[aff2] for multi-threaded + * cores. This logic will have to be updated if this changes. + */ + mpidr = read_cpuid_mpidr(); + if (mpidr & MPIDR_MT_BITMASK) + cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + else + cpu_cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + list_for_each_entry(cluster, &l2cache_pmu->clusters, next) { + if (cluster->cluster_id != cpu_cluster_id) + continue; + + dev_info(&l2cache_pmu->pdev->dev, + "CPU%d associated with cluster %d\n", cpu, + cluster->cluster_id); + cpumask_set_cpu(cpu, &cluster->cluster_cpus); + *per_cpu_ptr(l2cache_pmu->pmu_cluster, cpu) = cluster; + break; + } + + return cluster; +} + +static int l2cache_pmu_online_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct cluster_pmu *cluster; + struct l2cache_pmu *l2cache_pmu; + + l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node); + cluster = get_cluster_pmu(l2cache_pmu, cpu); + if (!cluster) { + /* First time this CPU has come online */ + cluster = l2_cache_associate_cpu_with_cluster(l2cache_pmu, cpu); + if (!cluster) { + /* Only if broken firmware doesn't list every cluster */ + WARN_ONCE(1, "No L2 cache cluster for CPU%d\n", cpu); + return 0; + } + } + + /* If another CPU is managing this cluster, we're done */ + if (cluster->on_cpu != -1) + return 0; + + /* + * All CPUs on this cluster were down, use this one. + * Reset to put it into sane state. + */ + cluster->on_cpu = cpu; + cpumask_set_cpu(cpu, &l2cache_pmu->cpumask); + cluster_pmu_reset(); + + WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(cpu))); + enable_irq(cluster->irq); + + return 0; +} + +static int l2cache_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct cluster_pmu *cluster; + struct l2cache_pmu *l2cache_pmu; + cpumask_t cluster_online_cpus; + unsigned int target; + + l2cache_pmu = hlist_entry_safe(node, struct l2cache_pmu, node); + cluster = get_cluster_pmu(l2cache_pmu, cpu); + if (!cluster) + return 0; + + /* If this CPU is not managing the cluster, we're done */ + if (cluster->on_cpu != cpu) + return 0; + + /* Give up ownership of cluster */ + cpumask_clear_cpu(cpu, &l2cache_pmu->cpumask); + cluster->on_cpu = -1; + + /* Any other CPU for this cluster which is still online */ + cpumask_and(&cluster_online_cpus, &cluster->cluster_cpus, + cpu_online_mask); + target = cpumask_any_but(&cluster_online_cpus, cpu); + if (target >= nr_cpu_ids) { + disable_irq(cluster->irq); + return 0; + } + + perf_pmu_migrate_context(&l2cache_pmu->pmu, cpu, target); + cluster->on_cpu = target; + cpumask_set_cpu(target, &l2cache_pmu->cpumask); + WARN_ON(irq_set_affinity(cluster->irq, cpumask_of(target))); + + return 0; +} + +static int l2_cache_pmu_probe_cluster(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev->parent); + struct platform_device *sdev = to_platform_device(dev); + struct l2cache_pmu *l2cache_pmu = data; + struct cluster_pmu *cluster; + struct acpi_device *device; + unsigned long fw_cluster_id; + int err; + int irq; + + if (acpi_bus_get_device(ACPI_HANDLE(dev), &device)) + return -ENODEV; + + if (kstrtoul(device->pnp.unique_id, 10, &fw_cluster_id) < 0) { + dev_err(&pdev->dev, "unable to read ACPI uid\n"); + return -ENODEV; + } + + cluster = devm_kzalloc(&pdev->dev, sizeof(*cluster), GFP_KERNEL); + if (!cluster) + return -ENOMEM; + + INIT_LIST_HEAD(&cluster->next); + list_add(&cluster->next, &l2cache_pmu->clusters); + cluster->cluster_id = fw_cluster_id; + + irq = platform_get_irq(sdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, + "Failed to get valid irq for cluster %ld\n", + fw_cluster_id); + return irq; + } + irq_set_status_flags(irq, IRQ_NOAUTOEN); + cluster->irq = irq; + + cluster->l2cache_pmu = l2cache_pmu; + cluster->on_cpu = -1; + + err = devm_request_irq(&pdev->dev, irq, l2_cache_handle_irq, + IRQF_NOBALANCING | IRQF_NO_THREAD, + "l2-cache-pmu", cluster); + if (err) { + dev_err(&pdev->dev, + "Unable to request IRQ%d for L2 PMU counters\n", irq); + return err; + } + + dev_info(&pdev->dev, + "Registered L2 cache PMU cluster %ld\n", fw_cluster_id); + + spin_lock_init(&cluster->pmu_lock); + + l2cache_pmu->num_pmus++; + + return 0; +} + +static int l2_cache_pmu_probe(struct platform_device *pdev) +{ + int err; + struct l2cache_pmu *l2cache_pmu; + + l2cache_pmu = + devm_kzalloc(&pdev->dev, sizeof(*l2cache_pmu), GFP_KERNEL); + if (!l2cache_pmu) + return -ENOMEM; + + INIT_LIST_HEAD(&l2cache_pmu->clusters); + + platform_set_drvdata(pdev, l2cache_pmu); + l2cache_pmu->pmu = (struct pmu) { + /* suffix is instance id for future use with multiple sockets */ + .name = "l2cache_0", + .task_ctx_nr = perf_invalid_context, + .pmu_enable = l2_cache_pmu_enable, + .pmu_disable = l2_cache_pmu_disable, + .event_init = l2_cache_event_init, + .add = l2_cache_event_add, + .del = l2_cache_event_del, + .start = l2_cache_event_start, + .stop = l2_cache_event_stop, + .read = l2_cache_event_read, + .attr_groups = l2_cache_pmu_attr_grps, + }; + + l2cache_pmu->num_counters = get_num_counters(); + l2cache_pmu->pdev = pdev; + l2cache_pmu->pmu_cluster = devm_alloc_percpu(&pdev->dev, + struct cluster_pmu *); + if (!l2cache_pmu->pmu_cluster) + return -ENOMEM; + + l2_cycle_ctr_idx = l2cache_pmu->num_counters - 1; + l2_counter_present_mask = GENMASK(l2cache_pmu->num_counters - 2, 0) | + BIT(L2CYCLE_CTR_BIT); + + cpumask_clear(&l2cache_pmu->cpumask); + + /* Read cluster info and initialize each cluster */ + err = device_for_each_child(&pdev->dev, l2cache_pmu, + l2_cache_pmu_probe_cluster); + if (err) + return err; + + if (l2cache_pmu->num_pmus == 0) { + dev_err(&pdev->dev, "No hardware L2 cache PMUs found\n"); + return -ENODEV; + } + + err = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, + &l2cache_pmu->node); + if (err) { + dev_err(&pdev->dev, "Error %d registering hotplug", err); + return err; + } + + err = perf_pmu_register(&l2cache_pmu->pmu, l2cache_pmu->pmu.name, -1); + if (err) { + dev_err(&pdev->dev, "Error %d registering L2 cache PMU\n", err); + goto out_unregister; + } + + dev_info(&pdev->dev, "Registered L2 cache PMU using %d HW PMUs\n", + l2cache_pmu->num_pmus); + + return err; + +out_unregister: + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, + &l2cache_pmu->node); + return err; +} + +static int l2_cache_pmu_remove(struct platform_device *pdev) +{ + struct l2cache_pmu *l2cache_pmu = + to_l2cache_pmu(platform_get_drvdata(pdev)); + + perf_pmu_unregister(&l2cache_pmu->pmu); + cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, + &l2cache_pmu->node); + return 0; +} + +static struct platform_driver l2_cache_pmu_driver = { + .driver = { + .name = "qcom-l2cache-pmu", + .acpi_match_table = ACPI_PTR(l2_cache_pmu_acpi_match), + }, + .probe = l2_cache_pmu_probe, + .remove = l2_cache_pmu_remove, +}; + +static int __init register_l2_cache_pmu_driver(void) +{ + int err; + + err = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, + "AP_PERF_ARM_QCOM_L2_ONLINE", + l2cache_pmu_online_cpu, + l2cache_pmu_offline_cpu); + if (err) + return err; + + return platform_driver_register(&l2_cache_pmu_driver); +} +device_initcall(register_l2_cache_pmu_driver); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 20bfefbe7594..1b7b2075b9cd 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -138,6 +138,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, + CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_ONLINE_DYN, From 76624175dcae6f7a808d345c0592908a15ca6975 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 7 Feb 2017 12:33:55 +0000 Subject: [PATCH 63/74] arm64: uaccess: consistently check object sizes Currently in arm64's copy_{to,from}_user, we only check the source/destination object size if access_ok() tells us the user access is permissible. However, in copy_from_user() we'll subsequently zero any remainder on the destination object. If we failed the access_ok() check, that applies to the whole object size, which we didn't check. To ensure that we catch that case, this patch hoists check_object_size() to the start of copy_from_user(), matching __copy_from_user() and __copy_to_user(). To make all of our uaccess copy primitives consistent, the same is done to copy_to_user(). Cc: Catalin Marinas Acked-by: Kees Cook Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 46da3ea638bb..5308d696311b 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -379,9 +379,9 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u { unsigned long res = n; kasan_check_write(to, n); + check_object_size(to, n, false); if (access_ok(VERIFY_READ, from, n)) { - check_object_size(to, n, false); res = __arch_copy_from_user(to, from, n); } if (unlikely(res)) @@ -392,9 +392,9 @@ static inline unsigned long __must_check copy_from_user(void *to, const void __u static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) { kasan_check_read(from, n); + check_object_size(from, n, true); if (access_ok(VERIFY_WRITE, to, n)) { - check_object_size(from, n, true); n = __arch_copy_to_user(to, from, n); } return n; From ffe3d1e43cc01457d513bf6e13605ebe8ae0fd9a Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Thu, 9 Feb 2017 09:52:03 +0800 Subject: [PATCH 64/74] arm64: use linux/sizes.h for constants Use linux/size.h to improve code readability. Signed-off-by: Miles Chen Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 0b07d42819da..1ad48f93abdd 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -407,7 +407,7 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { if (is_compat_task()) - return randomize_page(mm->brk, 0x02000000); + return randomize_page(mm->brk, SZ_32M); else - return randomize_page(mm->brk, 0x40000000); + return randomize_page(mm->brk, SZ_1G); } From abb7c61e03bcc241c4adcd634a839734d7c187df Mon Sep 17 00:00:00 2001 From: Miles Chen Date: Thu, 9 Feb 2017 19:45:34 +0800 Subject: [PATCH 65/74] arm64: use is_vmalloc_addr To is_vmalloc_addr() to check if an address is a vmalloc address instead of checking VMALLOC_START and VMALLOC_END manually. Signed-off-by: Miles Chen Signed-off-by: Will Deacon --- arch/arm64/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 01e88c8bcab0..c4c8cd4c31d4 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -88,7 +88,7 @@ void __iounmap(volatile void __iomem *io_addr) * We could get an address outside vmalloc range in case * of ioremap_cache() reusing a RAM mapping. */ - if (VMALLOC_START <= addr && addr < VMALLOC_END) + if (is_vmalloc_addr((void *)addr)) vunmap((void *)addr); } EXPORT_SYMBOL(__iounmap); From 6e01398fe4505ac4ac963fe0ca44b25e46783fef Mon Sep 17 00:00:00 2001 From: Ding Tianhong Date: Thu, 9 Feb 2017 17:00:34 +0000 Subject: [PATCH 66/74] arm64: arch_timer: document Hisilicon erratum 161010101 Now that we have a workaround for Hisilicon erratum 161010101, notes this in the arm64 silicon-errata document. The new config option is too long to fit in the existing kconfig column, so this is widened to accomodate it. At the same time, an existing whitespace error is corrected, and the existing pattern of a line space between vendors is enforced for recent additions. Signed-off-by: Ding Tianhong [Mark: split patch, reword commit message, rework table] Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 47 ++++++++++++++------------ 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 0006a94c2321..dd7c792bfdc4 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -42,25 +42,28 @@ file acts as a registry of software workarounds in the Linux Kernel and will be updated when new workarounds are committed and backported to stable kernels. -| Implementor | Component | Erratum ID | Kconfig | -+----------------+-----------------+-----------------+-------------------------+ -| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | -| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | -| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 | -| ARM | Cortex-A53 | #819472 | ARM64_ERRATUM_819472 | -| ARM | Cortex-A53 | #845719 | ARM64_ERRATUM_845719 | -| ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 | -| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | -| ARM | Cortex-A57 | #852523 | N/A | -| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | -| ARM | Cortex-A72 | #853709 | N/A | -| ARM | MMU-500 | #841119,#826419 | N/A | -| | | | | -| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | -| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | -| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | -| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | -| Cavium | ThunderX SMMUv2 | #27704 | N/A | -| | | | | -| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | -| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009| +| Implementor | Component | Erratum ID | Kconfig | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | +| ARM | Cortex-A53 | #824069 | ARM64_ERRATUM_824069 | +| ARM | Cortex-A53 | #819472 | ARM64_ERRATUM_819472 | +| ARM | Cortex-A53 | #845719 | ARM64_ERRATUM_845719 | +| ARM | Cortex-A53 | #843419 | ARM64_ERRATUM_843419 | +| ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | +| ARM | Cortex-A57 | #852523 | N/A | +| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | +| ARM | Cortex-A72 | #853709 | N/A | +| ARM | MMU-500 | #841119,#826419 | N/A | +| | | | | +| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | +| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 | +| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | +| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | +| Cavium | ThunderX SMMUv2 | #27704 | N/A | +| | | | | +| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | +| | | | | +| Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 | +| | | | | +| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | From 2bf47e194608192b7346baea1d57929a87ced3f9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 22 Sep 2016 11:25:25 +0100 Subject: [PATCH 67/74] arm64: head.S: Enable EL1 (host) access to SPE when entered at EL2 The SPE architecture requires each exception level to enable access to the SPE controls for the exception level below it, since additional context-switch logic may be required to handle the buffer safely. This patch allows EL1 (host) access to the SPE controls when entered at EL2. Acked-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c6cc82ec190b..4fb6ccd886d1 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -637,15 +637,26 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems #endif /* EL2 debug */ - mrs x0, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer - sbfx x0, x0, #8, #4 + mrs x1, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer + sbfx x0, x1, #8, #4 cmp x0, #1 b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to 4: - csel x0, xzr, x0, lt // all PMU counters from EL1 - msr mdcr_el2, x0 // (if they exist) + csel x3, xzr, x0, lt // all PMU counters from EL1 + + /* Statistical profiling */ + ubfx x0, x1, #32, #4 // Check ID_AA64DFR0_EL1 PMSVer + cbz x0, 6f // Skip if SPE not present + cbnz x2, 5f // VHE? + mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + orr x3, x3, x1 // If we don't have VHE, then + b 6f // use EL1&0 translation. +5: // For VHE, use EL2 translation + orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1 +6: + msr mdcr_el2, x3 // Configure debug traps /* Stage-2 translation */ msr vttbr_el2, xzr From 38fd94b0275c91071157a03cc27676909b23dcde Mon Sep 17 00:00:00 2001 From: Christopher Covington Date: Wed, 8 Feb 2017 15:08:37 -0500 Subject: [PATCH 68/74] arm64: Work around Falkor erratum 1003 The Qualcomm Datacenter Technologies Falkor v1 CPU may allocate TLB entries using an incorrect ASID when TTBRx_EL1 is being updated. When the erratum is triggered, page table entries using the new translation table base address (BADDR) will be allocated into the TLB using the old ASID. All circumstances leading to the incorrect ASID being cached in the TLB arise when software writes TTBRx_EL1[ASID] and TTBRx_EL1[BADDR], a memory operation is in the process of performing a translation using the specific TTBRx_EL1 being written, and the memory operation uses a translation table descriptor designated as non-global. EL2 and EL3 code changing the EL1&0 ASID is not subject to this erratum because hardware is prohibited from performing translations from an out-of-context translation regime. Consider the following pseudo code. write new BADDR and ASID values to TTBRx_EL1 Replacing the above sequence with the one below will ensure that no TLB entries with an incorrect ASID are used by software. write reserved value to TTBRx_EL1[ASID] ISB write new value to TTBRx_EL1[BADDR] ISB write new value to TTBRx_EL1[ASID] ISB When the above sequence is used, page table entries using the new BADDR value may still be incorrectly allocated into the TLB using the reserved ASID. Yet this will not reduce functionality, since TLB entries incorrectly tagged with the reserved ASID will never be hit by a later instruction. Based on work by Shanker Donthineni Reviewed-by: Catalin Marinas Signed-off-by: Christopher Covington Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 18 ++++++++++++++++++ arch/arm64/include/asm/assembler.h | 23 +++++++++++++++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/include/asm/mmu_context.h | 8 +++++++- arch/arm64/kernel/cpu_errata.c | 9 +++++++++ arch/arm64/mm/context.c | 11 +++++++++++ arch/arm64/mm/proc.S | 1 + 8 files changed, 72 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index dd7c792bfdc4..a71b8095dbd8 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -66,4 +66,5 @@ stable kernels. | | | | | | Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 | | | | | | +| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | | Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f45405664558..d2fe685b9026 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -480,6 +480,24 @@ config CAVIUM_ERRATUM_27456 If unsure, say Y. +config QCOM_FALKOR_ERRATUM_1003 + bool "Falkor E1003: Incorrect translation due to ASID change" + default y + select ARM64_PAN if ARM64_SW_TTBR0_PAN + help + On Falkor v1, an incorrect ASID may be cached in the TLB when ASID + and BADDR are changed together in TTBRx_EL1. The workaround for this + issue is to use a reserved ASID in cpu_do_switch_mm() before + switching to the new ASID. Saying Y here selects ARM64_PAN if + ARM64_SW_TTBR0_PAN is selected. This is done because implementing and + maintaining the E1003 workaround in the software PAN emulation code + would be an unnecessary complication. The affected Falkor v1 CPU + implements ARMv8.1 hardware PAN support and using hardware PAN + support versus software PAN emulation is mutually exclusive at + runtime. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1009 bool "Falkor E1009: Prematurely complete a DSB after a TLBI" default y diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 446f6c46d4b1..33b20c075fb3 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -422,6 +423,28 @@ alternative_endif mrs \rd, sp_el0 .endm +/* + * Errata workaround prior to TTBR0_EL1 update + * + * val: TTBR value with new BADDR, preserved + * tmp0: temporary register, clobbered + * tmp1: other temporary register, clobbered + */ + .macro pre_ttbr0_update_workaround, val, tmp0, tmp1 +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 +alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003 + mrs \tmp0, ttbr0_el1 + mov \tmp1, #FALKOR_RESERVED_ASID + bfi \tmp0, \tmp1, #48, #16 // reserved ASID + old BADDR + msr ttbr0_el1, \tmp0 + isb + bfi \tmp0, \val, #0, #48 // reserved ASID + new BADDR + msr ttbr0_el1, \tmp0 + isb +alternative_else_nop_endif +#endif + .endm + /* * Errata workaround post TTBR0_EL1 update. */ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index d1207ac696ac..fb78a5d3b60b 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -36,7 +36,8 @@ #define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 +#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 -#define ARM64_NCAPS 18 +#define ARM64_NCAPS 19 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 63e9982daca1..1ef40d82cfd3 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -19,6 +19,10 @@ #ifndef __ASM_MMU_CONTEXT_H #define __ASM_MMU_CONTEXT_H +#define FALKOR_RESERVED_ASID 1 + +#ifndef __ASSEMBLY__ + #include #include @@ -220,4 +224,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, void verify_cpu_asid_bits(void); -#endif +#endif /* !__ASSEMBLY__ */ + +#endif /* !__ASM_MMU_CONTEXT_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 32b9beda2ac8..f6cc67e7626e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -133,6 +133,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .def_scope = SCOPE_LOCAL_CPU, .enable = cpu_enable_trap_ctr_access, }, +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 + { + .desc = "Qualcomm Technologies Falkor erratum 1003", + .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, + MIDR_RANGE(MIDR_QCOM_FALKOR_V1, + MIDR_CPU_VAR_REV(0, 0), + MIDR_CPU_VAR_REV(0, 0)), + }, +#endif #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 { .desc = "Qualcomm Technologies Falkor erratum 1009", diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 4c63cb154859..68634c630cdd 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -79,6 +79,13 @@ void verify_cpu_asid_bits(void) } } +static void set_reserved_asid_bits(void) +{ + if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) && + cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003)) + __set_bit(FALKOR_RESERVED_ASID, asid_map); +} + static void flush_context(unsigned int cpu) { int i; @@ -87,6 +94,8 @@ static void flush_context(unsigned int cpu) /* Update the list of reserved ASIDs and the ASID bitmap. */ bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + set_reserved_asid_bits(); + /* * Ensure the generation bump is observed before we xchg the * active_asids. @@ -244,6 +253,8 @@ static int asids_init(void) panic("Failed to allocate bitmap for %lu ASIDs\n", NUM_USER_ASIDS); + set_reserved_asid_bits(); + pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); return 0; } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 32682be978e0..cd4d53d7e458 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -138,6 +138,7 @@ ENDPROC(cpu_do_resume) * - pgd_phys - physical address of new TTB */ ENTRY(cpu_do_switch_mm) + pre_ttbr0_update_workaround x0, x1, x2 mmid x1, x1 // get mm->context.id bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 From 12f043ff2b28fa64c9123b454cbe30a8a9e1967e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2017 22:27:01 +0100 Subject: [PATCH 69/74] arm64: fix warning about swapper_pg_dir overflow With 4 levels of 16KB pages, we get this warning about the fact that we are copying a whole page into an array that is declared as having only two pointers for the top level of the page table: arch/arm64/mm/mmu.c: In function 'paging_init': arch/arm64/mm/mmu.c:528:2: error: 'memcpy' writing 16384 bytes into a region of size 16 overflows the destination [-Werror=stringop-overflow=] This is harmless since we actually reserve a whole page in the definition of the array that comes from, and just the extern declaration is short. The pgdir is initialized to zero either way, so copying the actual entries here seems like the best solution. Acked-by: Ard Biesheuvel Acked-by: Mark Rutland Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 2131521ddc24..b805c017f789 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -525,7 +525,7 @@ void __init paging_init(void) * To do this we need to go via a temporary pgd. */ cpu_replace_ttbr1(__va(pgd_phys)); - memcpy(swapper_pg_dir, pgd, PAGE_SIZE); + memcpy(swapper_pg_dir, pgd, PGD_SIZE); cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); pgd_clear_fixmap(); From f705d954634d161db5923fc86cacf0f9e301f73a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2017 22:32:58 +0100 Subject: [PATCH 70/74] arm64: include asm/assembler.h in entry-ftrace.S In a randconfig build I ran into this build error: arch/arm64/kernel/entry-ftrace.S: Assembler messages: arch/arm64/kernel/entry-ftrace.S:101: Error: unknown mnemonic `ldr_l' -- `ldr_l x2,ftrace_trace_function' The macro is defined in asm/assembler.h, so we should include that file. Fixes: 829d2bd13392 ("arm64: entry-ftrace.S: avoid open-coded {adr,ldr}_l") Acked-by: Mark Rutland Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-ftrace.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 8f1905b795f3..e1be42e11ff5 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -10,6 +10,7 @@ */ #include +#include #include #include From 6c23e2ff7013be2c4bbcb7b9b3cc27c763348223 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 9 Feb 2017 15:19:18 +0000 Subject: [PATCH 71/74] arm64: ptrace: add XZR-safe regs accessors In A64, XZR and the SP share the same encoding (31), and whether an instruction accesses XZR or SP for a particular register parameter depends on the definition of the instruction. We store the SP in pt_regs::regs[31], and thus when emulating instructions, we must be careful to not erroneously read from or write back to the saved SP. Unfortunately, we often fail to be this careful. In all cases, instructions using a transfer register parameter Xt use this to refer to XZR rather than SP. This patch adds helpers so that we can more easily and consistently handle these cases. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/ptrace.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 513daf050e84..11403fdd0a50 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -194,6 +194,26 @@ static inline u64 regs_get_register(struct pt_regs *regs, unsigned int offset) return val; } +/* + * Read a register given an architectural register index r. + * This handles the common case where 31 means XZR, not SP. + */ +static inline unsigned long pt_regs_read_reg(const struct pt_regs *regs, int r) +{ + return (r == 31) ? 0 : regs->regs[r]; +} + +/* + * Write a register given an architectural register index r. + * This handles the common case where 31 means XZR, not SP. + */ +static inline void pt_regs_write_reg(struct pt_regs *regs, int r, + unsigned long val) +{ + if (r != 31) + regs->regs[r] = val; +} + /* Valid only for Kernel mode traps. */ static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) { From 8b6e70fccff27121430114b4507f0adfb790752f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 9 Feb 2017 15:19:19 +0000 Subject: [PATCH 72/74] arm64: traps: correctly handle MRS/MSR with XZR Currently we hand-roll XZR-safe register handling in user_cache_maint_handler(), though we forget to do the same in ctr_read_handler(), and may erroneously write back to the user SP rather than XZR. Use the new helpers to handle these cases correctly and consistently. Signed-off-by: Mark Rutland Fixes: 116c81f427ff6c53 ("arm64: Work around systems with mismatched cache line sizes") Cc: Andre Przywara Cc: Catalin Marinas Cc: Marc Zyngier Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 7c3fc0634aa2..350179becdf7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -466,7 +466,7 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; int ret = 0; - address = (rt == 31) ? 0 : regs->regs[rt]; + address = pt_regs_read_reg(regs, rt); switch (crm) { case ESR_ELx_SYS64_ISS_CRM_DC_CVAU: /* DC CVAU, gets promoted */ @@ -495,8 +495,10 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) { int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT; + unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); + + pt_regs_write_reg(regs, rt, val); - regs->regs[rt] = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); regs->pc += 4; } From 521c646108ed199d19c5c73978aaca3e18ca8f81 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 9 Feb 2017 15:19:20 +0000 Subject: [PATCH 73/74] arm64: cpufeature: correctly handle MRS to XZR In emulate_mrs() we may erroneously write back to the user SP rather than XZR if we trap an MRS instruction where Xt == 31. Use the new pt_regs_write_reg() helper to handle this correctly. Signed-off-by: Mark Rutland Fixes: 77c97b4ee21290f5 ("arm64: cpufeature: Expose CPUID registers by emulation") Cc: Andre Przywara Cc: Catalin Marinas Cc: Marc Zyngier Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 1ee5357d0c6a..abda8e861865 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1214,7 +1214,7 @@ static int emulate_mrs(struct pt_regs *regs, u32 insn) rc = emulate_sys_reg(sys_reg, &val); if (!rc) { dst = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); - regs->user_regs.regs[dst] = val; + pt_regs_write_reg(regs, dst, val); regs->pc += 4; } From ffe7afd1713558d73483834c2e2d03a1e39a4062 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 9 Feb 2017 15:19:21 +0000 Subject: [PATCH 74/74] arm64/kprobes: consistently handle MRS/MSR with XZR Now that we have XZR-safe helpers for fiddling with registers, use these in the arm64 kprobes code rather than open-coding the logic. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/simulate-insn.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c index 357d3efe1366..be05868418ee 100644 --- a/arch/arm64/kernel/probes/simulate-insn.c +++ b/arch/arm64/kernel/probes/simulate-insn.c @@ -17,6 +17,8 @@ #include #include +#include + #include "simulate-insn.h" #define bbl_displacement(insn) \ @@ -36,30 +38,22 @@ static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val) { - if (reg < 31) - regs->regs[reg] = val; + pt_regs_write_reg(regs, reg, val); } static inline void set_w_reg(struct pt_regs *regs, int reg, u64 val) { - if (reg < 31) - regs->regs[reg] = lower_32_bits(val); + pt_regs_write_reg(regs, reg, lower_32_bits(val)); } static inline u64 get_x_reg(struct pt_regs *regs, int reg) { - if (reg < 31) - return regs->regs[reg]; - else - return 0; + return pt_regs_read_reg(regs, reg); } static inline u32 get_w_reg(struct pt_regs *regs, int reg) { - if (reg < 31) - return lower_32_bits(regs->regs[reg]); - else - return 0; + return lower_32_bits(pt_regs_read_reg(regs, reg)); } static bool __kprobes check_cbz(u32 opcode, struct pt_regs *regs)