arm64 fixes for 4.3-rc2

A mixture of fixes for regressions introduced during the merge window, some longer standing problems that we spotted and a couple of hardware errata. The main changes are: - Fix fallout from the h/w DBM patches, causing filesystem writeback issues on both v8 and v8.1 CPUs - Workaround for Cortex-A53 erratum #843419 in the module loader - Fix for long-standing issue with compat big-endian signal handlers using the saved floating point state -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABCgAGBQJV+pz5AAoJEC379FI+VC/ZhlIP/26twmE0COohnFmJbYLqipF4 fX3kLZVpW9yYqSbP/pZyY8T+7B838Ke1JuUisJEWQzsHBrdvv0YmXAb1SaPm1B6V DMDXAPgsjOugmW8BHvVui/zau8VfShkXIfr4E6bHQbo6wQCzVbOTMqotXzsM+kkm vODvhSsaTUWH9DNTG+euP4FZBUYKQjHy/ODY3tyQ65Nm/zTA7J3ZYKWM5HSlzt2x 4DXFTsxNpIHsflUo/H7LpG/S/DuEE3eIQyMDU3zUuE0wjAR7ukBfKGd8XVMccXUq ZR1rUH6NMSUkoiZp6zjck+6FFJ2f3As19wQlmY4+CSQBD7T0Ve6pCGueZV8F3OUG aX+bth3xCz6lP7XVXV6IChuIs9kSnEmGH28q1rPIjWiOI3yMGG218TWxP3qLZAAF aL0G95WmFpVii1PkgoELBHkfi5WQOaoQwKFNErP4SkuGNFlOKGF1sIq3AqgyypW5 oY1oWZ3DHDidWl0rUjanSCbDdY+wl8/hB0XiWtBolEnw79ciNOU2rPbTrsER+ilL Lrxp0PHb4UaZvKUWa8SQwqOkuuAIVe53wNZovizISCOomYR7LouMrA/8z4mQPieL 8jrb0R5l3H3sO3umpKvA3GrZIs5Po7QjLhJ8EnFvtba2Mk5JzNH4uHRdFvGBAG00 dbHd9rJ3wHlBhaYahVjS =Bc04 -----END PGP SIGNATURE----- Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux Pull arm64 fixes from Will Deacon: "This addresses some problems with filesystem writeback due to the recently merged hardware DBM patches, which caused us to treat some read-only pages as dirty. There are also some other, less significant fixes that are described in the summary below: A mixture of fixes for regressions introduced during the merge window, some longer standing problems that we spotted and a couple of hardware errata. The main changes are: - Fix fallout from the h/w DBM patches, causing filesystem writeback issues on both v8 and v8.1 CPUs - Workaround for Cortex-A53 erratum #843419 in the module loader - Fix for long-standing issue with compat big-endian signal handlers using the saved floating point state" * tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: arm64: errata: add module build workaround for erratum #843419 arm64: compat: fix vfp save/restore across signal handlers in big-endian arm64: cpu hotplug: ensure we mask out CPU_TASKS_FROZEN in notifiers arm64: head.S: initialise mdcr_el2 in el2_setup arm64: enable generic idle loop arm64: pgtable: use a single bit for PTE_WRITE regardless of DBM arm64: Fix pte_modify() to preserve the hardware dirty information arm64: Fix the pte_hw_dirty() check when AF/DBM is enabled arm64: dma-mapping: check whether cma area is initialized or not
2025-01-07 18:14:04 +00:00 · 2015-09-17 11:28:17 -07:00 · 2015-09-17 11:28:17 -07:00 · d109c4bb45
commit d109c4bb45
parent 42dc2a3048 df057cc7b4
9 changed files with 71 additions and 22 deletions
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@ -32,6 +32,7 @@ config ARM64
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_EARLY_IOREMAP
+	select GENERIC_IDLE_POLL_SETUP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW_LEVEL
@ -331,6 +332,22 @@ config ARM64_ERRATUM_845719

 	  If unsure, say Y.

+config ARM64_ERRATUM_843419
+	bool "Cortex-A53: 843419: A load or store might access an incorrect address"
+	depends on MODULES
+	default y
+	help
+	  This option builds kernel modules using the large memory model in
+	  order to avoid the use of the ADRP instruction, which can cause
+	  a subsequent memory access to use an incorrect address on Cortex-A53
+	  parts up to r0p4.
+
+	  Note that the kernel itself must be linked with a version of ld
+	  which fixes potentially affected ADRP instructions through the
+	  use of veneers.
+
+	  If unsure, say Y.
+
 endmenu


--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@ -41,6 +41,10 @@ endif

 CHECKFLAGS	+= -D__aarch64__

+ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
+CFLAGS_MODULE	+= -mcmodel=large
+endif
+
 # Default value
 head-y		:= arch/arm64/kernel/head.o

--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@ -26,13 +26,9 @@
 * Software defined PTE bits definition.
 */
 #define PTE_VALID		(_AT(pteval_t, 1) << 0)
+#define PTE_WRITE		(PTE_DBM)		 /* same as DBM (51) */
 #define PTE_DIRTY		(_AT(pteval_t, 1) << 55)
 #define PTE_SPECIAL		(_AT(pteval_t, 1) << 56)
-#ifdef CONFIG_ARM64_HW_AFDBM
-#define PTE_WRITE		(PTE_DBM)		 /* same as DBM */
-#else
-#define PTE_WRITE		(_AT(pteval_t, 1) << 57)
-#endif
 #define PTE_PROT_NONE		(_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */

 /*
@ -146,7 +142,7 @@ extern struct page *empty_zero_page;
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))

 #ifdef CONFIG_ARM64_HW_AFDBM
-#define pte_hw_dirty(pte)	(!(pte_val(pte) & PTE_RDONLY))
+#define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
 #else
 #define pte_hw_dirty(pte)	(0)
 #endif
@ -238,7 +234,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via
 * the page fault mechanism. Checking the dirty status of a pte becomes:
 *
- *   PTE_DIRTY || !PTE_RDONLY
+ *   PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY)
 */
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
@ -503,7 +499,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 			      PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK;
 	/* preserve the hardware dirty information */
 	if (pte_hw_dirty(pte))
-		newprot |= PTE_DIRTY;
+		pte = pte_mkdirty(pte);
 	pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
 	return pte;
 }
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@ -134,7 +134,7 @@ static int os_lock_notify(struct notifier_block *self,
 				    unsigned long action, void *data)
 {
 	int cpu = (unsigned long)data;
-	if (action == CPU_ONLINE)
+	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
 		smp_call_function_single(cpu, clear_os_lock, NULL, 1);
 	return NOTIFY_OK;
 }
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@ -523,6 +523,11 @@ CPU_LE(	movk	x0, #0x30d0, lsl #16	)	// Clear EE and E0E on LE systems
 	msr	hstr_el2, xzr			// Disable CP15 traps to EL2
 #endif

+	/* EL2 debug */
+	mrs	x0, pmcr_el0			// Disable debug access traps
+	ubfx	x0, x0, #11, #5			// to EL2 and allow access to
+	msr	mdcr_el2, x0			// all PMU counters from EL1
+
 	/* Stage-2 translation */
 	msr	vttbr_el2, xzr

--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@ -872,7 +872,7 @@ static int hw_breakpoint_reset_notify(struct notifier_block *self,
 						void *hcpu)
 {
 	int cpu = (long)hcpu;
-	if (action == CPU_ONLINE)
+	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
 		smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1);
 	return NOTIFY_OK;
 }
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@ -332,12 +332,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
 					     AARCH64_INSN_IMM_ADR);
 			break;
+#ifndef CONFIG_ARM64_ERRATUM_843419
 		case R_AARCH64_ADR_PREL_PG_HI21_NC:
 			overflow_check = false;
 		case R_AARCH64_ADR_PREL_PG_HI21:
 			ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21,
 					     AARCH64_INSN_IMM_ADR);
 			break;
+#endif
 		case R_AARCH64_ADD_ABS_LO12_NC:
 		case R_AARCH64_LDST8_ABS_LO12_NC:
 			overflow_check = false;
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@ -212,14 +212,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)

 /*
 * VFP save/restore code.
+ *
+ * We have to be careful with endianness, since the fpsimd context-switch
+ * code operates on 128-bit (Q) register values whereas the compat ABI
+ * uses an array of 64-bit (D) registers. Consequently, we need to swap
+ * the two halves of each Q register when running on a big-endian CPU.
 */
+union __fpsimd_vreg {
+	__uint128_t	raw;
+	struct {
+#ifdef __AARCH64EB__
+		u64	hi;
+		u64	lo;
+#else
+		u64	lo;
+		u64	hi;
+#endif
+	};
+};
+
 static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
 {
 	struct fpsimd_state *fpsimd = &current->thread.fpsimd_state;
 	compat_ulong_t magic = VFP_MAGIC;
 	compat_ulong_t size = VFP_STORAGE_SIZE;
 	compat_ulong_t fpscr, fpexc;
-	int err = 0;
+	int i, err = 0;

 	/*
 	 * Save the hardware registers to the fpsimd_state structure.
@ -235,10 +253,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
 	/*
 	 * Now copy the FP registers. Since the registers are packed,
 	 * we can copy the prefix we want (V0-V15) as it is.
-	 * FIXME: Won't work if big endian.
 	 */
-	err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs,
-			      sizeof(frame->ufp.fpregs));
+	for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) {
+		union __fpsimd_vreg vreg = {
+			.raw = fpsimd->vregs[i >> 1],
+		};
+
+		__put_user_error(vreg.lo, &frame->ufp.fpregs[i], err);
+		__put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err);
+	}

 	/* Create an AArch32 fpscr from the fpsr and the fpcr. */
 	fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) |
@ -263,7 +286,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
 	compat_ulong_t magic = VFP_MAGIC;
 	compat_ulong_t size = VFP_STORAGE_SIZE;
 	compat_ulong_t fpscr;
-	int err = 0;
+	int i, err = 0;

 	__get_user_error(magic, &frame->magic, err);
 	__get_user_error(size, &frame->size, err);
@ -273,12 +296,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
 	if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE)
 		return -EINVAL;

-	/*
-	 * Copy the FP registers into the start of the fpsimd_state.
-	 * FIXME: Won't work if big endian.
-	 */
-	err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs,
-				sizeof(frame->ufp.fpregs));
+	/* Copy the FP registers into the start of the fpsimd_state. */
+	for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) {
+		union __fpsimd_vreg vreg;
+
+		__get_user_error(vreg.lo, &frame->ufp.fpregs[i], err);
+		__get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err);
+		fpsimd.vregs[i >> 1] = vreg.raw;
+	}

 	/* Extract the fpsr and the fpcr from the fpscr */
 	__get_user_error(fpscr, &frame->ufp.fpscr, err);
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@ -100,7 +100,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
 	if (IS_ENABLED(CONFIG_ZONE_DMA) &&
 	    dev->coherent_dma_mask <= DMA_BIT_MASK(32))
 		flags |= GFP_DMA;
-	if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) {
+	if (dev_get_cma_area(dev) && (flags & __GFP_WAIT)) {
 		struct page *page;
 		void *addr;