From 22048c5485503749754b3b5daf9d99ef89fcacdc Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 4 Mar 2017 15:42:48 -0500
Subject: [PATCH 001/410] tools/power turbostat: bugfix: GFXMHz column not
 changing

turbostat displays a GFXMHz column, which comes from reading
/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz

But GFXMHz was not changing, even when a manual
cat /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
showed a new value.

It turns out that a rewind() on the open file is not sufficient,
fflush() (or a close/open) is needed to read fresh values.

Reported-by: Yaroslav Isakov <yaroslav.isakov@gmail.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 828dccd3f01e..d7fb6bcb2744 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2485,8 +2485,10 @@ int snapshot_gfx_mhz(void)
 
 	if (fp == NULL)
 		fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
-	else
+	else {
 		rewind(fp);
+		fflush(fp);
+	}
 
 	retval = fscanf(fp, "%d", &gfx_cur_mhz);
 	if (retval != 1)

From 2e6c7747730296a6d4fd700894286db1132598c4 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Thu, 16 Feb 2017 12:39:01 +0000
Subject: [PATCH 002/410] MIPS: Force o32 fp64 support on 32bit MIPS64r6
 kernels

When a 32-bit kernel is configured to support MIPS64r6 (CPU_MIPS64_R6),
MIPS_O32_FP64_SUPPORT won't be selected as it should be because
MIPS32_O32 is disabled (o32 is already the default ABI available on
32-bit kernels).

This results in userland FP breakage as CP0_Status.FR is read-only 1
since r6 (when an FPU is present) so __enable_fpu() will fail to clear
FR. This causes the FPU emulator to get used which will incorrectly
emulate 32-bit FPU registers.

Force o32 fp64 support in this case by also selecting
MIPS_O32_FP64_SUPPORT from CPU_MIPS64_R6 if 32BIT.

Fixes: 4e9d324d4288 ("MIPS: Require O32 FP64 support for MIPS64 with O32 compat")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Reviewed-by: Paul Burton <paul.burton@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 4.0.x-
Patchwork: https://patchwork.linux-mips.org/patch/15310/
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/mips/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a008a9f03072..e0bb576410bb 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1531,7 +1531,7 @@ config CPU_MIPS64_R6
 	select CPU_SUPPORTS_HIGHMEM
 	select CPU_SUPPORTS_MSA
 	select GENERIC_CSUM
-	select MIPS_O32_FP64_SUPPORT if MIPS32_O32
+	select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
 	select HAVE_KVM
 	help
 	  Choose this option to build a kernel for release 6 or later of the

From 4b5347a24a0f2d3272032c120664b484478455de Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 23 Feb 2017 14:50:24 +0000
Subject: [PATCH 003/410] MIPS: End spinlocks with .insn

When building for microMIPS we need to ensure that the assembler always
knows that there is code at the target of a branch or jump. Recent
toolchains will fail to link a microMIPS kernel when this isn't the case
due to what it thinks is a branch to non-microMIPS code.

mips-mti-linux-gnu-ld kernel/built-in.o: .spinlock.text+0x2fc: Unsupported branch between ISA modes.
mips-mti-linux-gnu-ld final link failed: Bad value

This is due to inline assembly labels in spinlock.h not being followed
by an instruction mnemonic, either due to a .subsection pseudo-op or the
end of the inline asm block.

Fix this with a .insn direction after such labels.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Reviewed-by: Maciej W. Rozycki <macro@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: <stable@vger.kernel.org>
Patchwork: https://patchwork.linux-mips.org/patch/15325/
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/mips/include/asm/spinlock.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index f485afe51514..a8df44d60607 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -127,7 +127,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 		"	andi	%[ticket], %[ticket], 0xffff		\n"
 		"	bne	%[ticket], %[my_ticket], 4f		\n"
 		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
-		"2:							\n"
+		"2:	.insn						\n"
 		"	.subsection 2					\n"
 		"4:	andi	%[ticket], %[ticket], 0xffff		\n"
 		"	sll	%[ticket], 5				\n"
@@ -202,7 +202,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
 		"	sc	%[ticket], %[ticket_ptr]		\n"
 		"	beqz	%[ticket], 1b				\n"
 		"	 li	%[ticket], 1				\n"
-		"2:							\n"
+		"2:	.insn						\n"
 		"	.subsection 2					\n"
 		"3:	b	2b					\n"
 		"	 li	%[ticket], 0				\n"
@@ -382,7 +382,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 		"	.set	reorder					\n"
 		__WEAK_LLSC_MB
 		"	li	%2, 1					\n"
-		"2:							\n"
+		"2:	.insn						\n"
 		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
 		: GCC_OFF_SMALL_ASM() (rw->lock)
 		: "memory");
@@ -422,7 +422,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
 			"	lui	%1, 0x8000			\n"
 			"	sc	%1, %0				\n"
 			"	li	%2, 1				\n"
-			"2:						\n"
+			"2:	.insn					\n"
 			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp),
 			  "=&r" (ret)
 			: GCC_OFF_SMALL_ASM() (rw->lock)

From 7c5a3d813050ee235817b0220dd8c42359a9efd8 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Sat, 25 Feb 2017 11:54:23 +0100
Subject: [PATCH 004/410] MIPS: ralink: Fix typos in rt3883 pinctrl

There are two copy & paste errors in the definition of the 5GHz LNA and
second ethernet pinmux.

Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data")
Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Cc: linux-mips@linux-mips.org
Cc: <stable@vger.kernel.org> # 3.19.x-
Patchwork: https://patchwork.linux-mips.org/patch/15328/
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/mips/ralink/rt3883.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c
index c4ffd43d3996..48ce701557a4 100644
--- a/arch/mips/ralink/rt3883.c
+++ b/arch/mips/ralink/rt3883.c
@@ -35,7 +35,7 @@ static struct rt2880_pmx_func uartlite_func[] = { FUNC("uartlite", 0, 15, 2) };
 static struct rt2880_pmx_func jtag_func[] = { FUNC("jtag", 0, 17, 5) };
 static struct rt2880_pmx_func mdio_func[] = { FUNC("mdio", 0, 22, 2) };
 static struct rt2880_pmx_func lna_a_func[] = { FUNC("lna a", 0, 32, 3) };
-static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna a", 0, 35, 3) };
+static struct rt2880_pmx_func lna_g_func[] = { FUNC("lna g", 0, 35, 3) };
 static struct rt2880_pmx_func pci_func[] = {
 	FUNC("pci-dev", 0, 40, 32),
 	FUNC("pci-host2", 1, 40, 32),
@@ -43,7 +43,7 @@ static struct rt2880_pmx_func pci_func[] = {
 	FUNC("pci-fnc", 3, 40, 32)
 };
 static struct rt2880_pmx_func ge1_func[] = { FUNC("ge1", 0, 72, 12) };
-static struct rt2880_pmx_func ge2_func[] = { FUNC("ge1", 0, 84, 12) };
+static struct rt2880_pmx_func ge2_func[] = { FUNC("ge2", 0, 84, 12) };
 
 static struct rt2880_pmx_group rt3883_pinmux_data[] = {
 	GRP("i2c", i2c_func, 1, RT3883_GPIO_MODE_I2C),

From 0c7e2bc87ea6c2eb6f369998f74a0278e64863e4 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Sat, 4 Mar 2017 00:32:03 +0000
Subject: [PATCH 005/410] MIPS: Include asm/ptrace.h now linux/sched.h doesn't

Use of the task_pt_regs() based macros in MIPS' asm/processor.h for
accessing the user context on the kernel stack need the definition of
struct pt_regs from asm/ptrace.h. __own_fpu() in asm/fpu.h uses these
macros but implicitly depended on linux/sched.h to include asm/ptrace.h.

Since commit f780d89a0e82 ("sched/headers: Remove <asm/ptrace.h> from
<linux/sched.h>") however linux/sched.h no longer includes asm/ptrace.h,
so include it explicitly from asm/fpu.h where it is needed instead.

This fixes build errors such as:

./arch/mips/include/asm/fpu.h: In function '__own_fpu':
./arch/mips/include/asm/processor.h:385:31: error: invalid application of 'sizeof' to incomplete type 'struct pt_regs'
     THREAD_SIZE - 32 - sizeof(struct pt_regs))
                               ^

Fixes: f780d89a0e82 ("sched/headers: Remove <asm/ptrace.h> from <linux/sched.h>")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15386/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/fpu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index 321752bcbab6..1527efaf4af4 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -20,6 +20,7 @@
 #include <asm/cpu-features.h>
 #include <asm/fpu_emulator.h>
 #include <asm/hazards.h>
+#include <asm/ptrace.h>
 #include <asm/processor.h>
 #include <asm/current.h>
 #include <asm/msa.h>

From 9cb74b5e134c9f133001dd1585deef5353cd85f1 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Sat, 4 Mar 2017 00:41:25 +0000
Subject: [PATCH 006/410] MIPS: Wire up statx system call

Wire up the statx system call for MIPS, which was introduced in commit
a528d35e8bfc ("statx: Add a system call to make enhanced file info
available").

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15387/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/uapi/asm/unistd.h | 15 +++++++++------
 arch/mips/kernel/scall32-o32.S      |  1 +
 arch/mips/kernel/scall64-64.S       |  1 +
 arch/mips/kernel/scall64-n32.S      |  1 +
 arch/mips/kernel/scall64-o32.S      |  1 +
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index 3e940dbe0262..78faf4292e90 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -386,17 +386,18 @@
 #define __NR_pkey_mprotect		(__NR_Linux + 363)
 #define __NR_pkey_alloc			(__NR_Linux + 364)
 #define __NR_pkey_free			(__NR_Linux + 365)
+#define __NR_statx			(__NR_Linux + 366)
 
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		365
+#define __NR_Linux_syscalls		366
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		365
+#define __NR_O32_Linux_syscalls		366
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -730,16 +731,17 @@
 #define __NR_pkey_mprotect		(__NR_Linux + 323)
 #define __NR_pkey_alloc			(__NR_Linux + 324)
 #define __NR_pkey_free			(__NR_Linux + 325)
+#define __NR_statx			(__NR_Linux + 326)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		325
+#define __NR_Linux_syscalls		326
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		325
+#define __NR_64_Linux_syscalls		326
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1077,15 +1079,16 @@
 #define __NR_pkey_mprotect		(__NR_Linux + 327)
 #define __NR_pkey_alloc			(__NR_Linux + 328)
 #define __NR_pkey_free			(__NR_Linux + 329)
+#define __NR_statx			(__NR_Linux + 330)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		329
+#define __NR_Linux_syscalls		330
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		329
+#define __NR_N32_Linux_syscalls		330
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index c29d397eee86..80ed68b2c95e 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -600,3 +600,4 @@ EXPORT(sys_call_table)
 	PTR	sys_pkey_mprotect
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free			/* 4365 */
+	PTR	sys_statx
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 0687f96ee912..49765b44aa9b 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -438,4 +438,5 @@ EXPORT(sys_call_table)
 	PTR	sys_pkey_mprotect
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free			/* 5325 */
+	PTR	sys_statx
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 0331ba39a065..90bad2d1b2d3 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -433,4 +433,5 @@ EXPORT(sysn32_call_table)
 	PTR	sys_pkey_mprotect
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free
+	PTR	sys_statx			/* 6330 */
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 5a47042dd25f..2dd70bd104e1 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -588,4 +588,5 @@ EXPORT(sys32_call_table)
 	PTR	sys_pkey_mprotect
 	PTR	sys_pkey_alloc
 	PTR	sys_pkey_free			/* 4365 */
+	PTR	sys_statx
 	.size	sys32_call_table,.-sys32_call_table

From 8b38f890eff6efc153130254a96452570f971159 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 10 Mar 2017 10:41:50 +0900
Subject: [PATCH 007/410] MAINTAINERS: add Masahiro Yamada as a Kbuild
 maintainer

It has been difficult lately for Michal to work on Kbuild on his
regular basis.  We discussed the maintainership of Kbuild, and I
decided to be a co-maintainer.

Add myself to the maintainer field, and replace the repository with
my own.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Michal Marek <mmarek@suse.com>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c265a5fe4848..f1023c52b1e3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7084,9 +7084,9 @@ S:	Maintained
 F:	fs/autofs4/
 
 KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
+M:	Masahiro Yamada <yamada.masahiro@socionext.com>
 M:	Michal Marek <mmarek@suse.com>
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git for-next
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git rc-fixes
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
 L:	linux-kbuild@vger.kernel.org
 S:	Maintained
 F:	Documentation/kbuild/

From 4607ebf04b8190d2c7aa5504959e9fddfc0cd736 Mon Sep 17 00:00:00 2001
From: "Allan, Bruce W" <bruce.w.allan@intel.com>
Date: Tue, 7 Mar 2017 15:48:23 -0800
Subject: [PATCH 008/410] kbuild: external module build warnings when
 KBUILD_OUTPUT set and W=1

Commit db547ef19064 ("Kbuild: don't add obj tree in additional includes")
causes warnings (-Wmissing-include-dirs) when compiling external modules
with KBUILD_OUTPUT set and W=1.  This is because $src can be an absolute
path to the external module source which when prefixed with -I$(srctree)/
generates an incorrect directory path.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/Makefile.lib | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 0a07f9014944..7234e61e7ce3 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -155,7 +155,7 @@ else
 # $(call addtree,-I$(obj)) locates .h files in srctree, from generated .c files
 #   and locates generated .h files
 # FIXME: Replace both with specific CFLAGS* statements in the makefiles
-__c_flags	= $(if $(obj),-I$(srctree)/$(src) -I$(obj)) \
+__c_flags	= $(if $(obj),$(call addtree,-I$(src)) -I$(obj)) \
 		  $(call flags,_c_flags)
 __a_flags	= $(call flags,_a_flags)
 __cpp_flags     = $(call flags,_cpp_flags)

From f1a880a93baaadb14c10a348fd199f1cdb6bcccd Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Wed, 15 Mar 2017 15:12:23 -0700
Subject: [PATCH 009/410] dm verity fec: limit error correction recursion

If the hash tree itself is sufficiently corrupt in addition to data blocks,
it's possible for error correction to end up in a deep recursive loop,
which eventually causes a kernel panic.  This change limits the
recursion to a reasonable level during a single I/O operation.

Fixes: a739ff3f543a ("dm verity: add support for forward error correction")
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org # v4.5+
---
 drivers/md/dm-verity-fec.c | 12 +++++++++++-
 drivers/md/dm-verity-fec.h |  4 ++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 0f0eb8a3d922..c3cc04d89524 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -439,6 +439,13 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
 	if (!verity_fec_is_enabled(v))
 		return -EOPNOTSUPP;
 
+	if (fio->level >= DM_VERITY_FEC_MAX_RECURSION) {
+		DMWARN_LIMIT("%s: FEC: recursion too deep", v->data_dev->name);
+		return -EIO;
+	}
+
+	fio->level++;
+
 	if (type == DM_VERITY_BLOCK_TYPE_METADATA)
 		block += v->data_blocks;
 
@@ -470,7 +477,7 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
 	if (r < 0) {
 		r = fec_decode_rsb(v, io, fio, rsb, offset, true);
 		if (r < 0)
-			return r;
+			goto done;
 	}
 
 	if (dest)
@@ -480,6 +487,8 @@ int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io,
 		r = verity_for_bv_block(v, io, iter, fec_bv_copy);
 	}
 
+done:
+	fio->level--;
 	return r;
 }
 
@@ -520,6 +529,7 @@ void verity_fec_init_io(struct dm_verity_io *io)
 	memset(fio->bufs, 0, sizeof(fio->bufs));
 	fio->nbufs = 0;
 	fio->output = NULL;
+	fio->level = 0;
 }
 
 /*
diff --git a/drivers/md/dm-verity-fec.h b/drivers/md/dm-verity-fec.h
index 7fa0298b995e..bb31ce87a933 100644
--- a/drivers/md/dm-verity-fec.h
+++ b/drivers/md/dm-verity-fec.h
@@ -27,6 +27,9 @@
 #define DM_VERITY_FEC_BUF_MAX \
 	(1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS))
 
+/* maximum recursion level for verity_fec_decode */
+#define DM_VERITY_FEC_MAX_RECURSION	4
+
 #define DM_VERITY_OPT_FEC_DEV		"use_fec_from_device"
 #define DM_VERITY_OPT_FEC_BLOCKS	"fec_blocks"
 #define DM_VERITY_OPT_FEC_START		"fec_start"
@@ -58,6 +61,7 @@ struct dm_verity_fec_io {
 	unsigned nbufs;		/* number of buffers allocated */
 	u8 *output;		/* buffer for corrected output */
 	size_t output_pos;
+	unsigned level;		/* recursion level */
 };
 
 #ifdef CONFIG_DM_VERITY_FEC

From b334e19ae9381f12a7521976883022385d2b7eef Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jan 2017 16:40:01 +0100
Subject: [PATCH 010/410] Kbuild: use cc-disable-warning consistently for
 maybe-uninitialized

In commit a76bcf557ef4 ("Kbuild: enable -Wmaybe-uninitialized warning
for "make W=1""), I reverted another change that happened to fix a problem
with old compilers, and now we get this report again with old compilers
(prior to gcc-4.8) and GCOV enabled:

   cc1: warnings being treated as errors
   drivers/gpu/drm/i915/intel_ringbuffer.c: In function 'intel_ring_setup_status_page':
   drivers/gpu/drm/i915/intel_ringbuffer.c:438: error: 'mmio.reg' may be used uninitialized in this function
   At top level:
>> cc1: error: unrecognized command line option "-Wno-maybe-uninitialized"

The problem is that we turn off the warning conditionally in a number
of places as we should, but one of them does it unconditionally.
Instead, change it to call cc-disable-warning as we do elsewhere.

The original patch that caused it was merged into linux-4.7, then
4.8 removed the change and 4.9 brought it back, so we probably want
a backport to 4.9 once this is merged.

Use a ':=' assignment instead of '=' to force the cc-disable-warning
call to only be evaluated once instead of every time.

Cc: stable@vger.kernel.org
Fixes: a76bcf557ef4 ("Kbuild: enable -Wmaybe-uninitialized warning for "make W=1"")
Fixes: e72e2dfe7c16 ("gcov: disable -Wmaybe-uninitialized warning")
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 165cf9783a5d..b8a6b862ed73 100644
--- a/Makefile
+++ b/Makefile
@@ -372,7 +372,7 @@ LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
 LDFLAGS_vmlinux =
-CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
+CFLAGS_GCOV	:= -fprofile-arcs -ftest-coverage -fno-tree-loop-im $(call cc-disable-warning,maybe-uninitialized,)
 CFLAGS_KCOV	:= $(call cc-option,-fsanitize-coverage=trace-pc,)
 
 

From 77f88796cee819b9c4562b0b6b44691b3b7755b1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 16 Mar 2017 16:54:24 -0400
Subject: [PATCH 011/410] cgroup, kthread: close race window where new kthreads
 can be migrated to non-root cgroups

Creation of a kthread goes through a couple interlocked stages between
the kthread itself and its creator.  Once the new kthread starts
running, it initializes itself and wakes up the creator.  The creator
then can further configure the kthread and then let it start doing its
job by waking it up.

In this configuration-by-creator stage, the creator is the only one
that can wake it up but the kthread is visible to userland.  When
altering the kthread's attributes from userland is allowed, this is
fine; however, for cases where CPU affinity is critical,
kthread_bind() is used to first disable affinity changes from userland
and then set the affinity.  This also prevents the kthread from being
migrated into non-root cgroups as that can affect the CPU affinity and
many other things.

Unfortunately, the cgroup side of protection is racy.  While the
PF_NO_SETAFFINITY flag prevents further migrations, userland can win
the race before the creator sets the flag with kthread_bind() and put
the kthread in a non-root cgroup, which can lead to all sorts of
problems including incorrect CPU affinity and starvation.

This bug got triggered by userland which periodically tries to migrate
all processes in the root cpuset cgroup to a non-root one.  Per-cpu
workqueue workers got caught while being created and ended up with
incorrected CPU affinity breaking concurrency management and sometimes
stalling workqueue execution.

This patch adds task->no_cgroup_migration which disallows the task to
be migrated by userland.  kthreadd starts with the flag set making
every child kthread start in the root cgroup with migration
disallowed.  The flag is cleared after the kthread finishes
initialization by which time PF_NO_SETAFFINITY is set if the kthread
should stay in the root cgroup.

It'd be better to wait for the initialization instead of failing but I
couldn't think of a way of implementing that without adding either a
new PF flag, or sleeping and retrying from waiting side.  Even if
userland depends on changing cgroup membership of a kthread, it either
has to be synchronized with kthread_create() or periodically repeat,
so it's unlikely that this would break anything.

v2: Switch to a simpler implementation using a new task_struct bit
    field suggested by Oleg.

Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Reported-and-debugged-by: Chris Mason <clm@fb.com>
Cc: stable@vger.kernel.org # v4.3+ (we can't close the race on < v4.3)
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 21 +++++++++++++++++++++
 include/linux/sched.h  |  4 ++++
 kernel/cgroup/cgroup.c |  9 +++++----
 kernel/kthread.c       |  3 +++
 4 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f6b43fbb141c..af9c86e958bd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -570,6 +570,25 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 	pr_cont_kernfs_path(cgrp->kn);
 }
 
+static inline void cgroup_init_kthreadd(void)
+{
+	/*
+	 * kthreadd is inherited by all kthreads, keep it in the root so
+	 * that the new kthreads are guaranteed to stay in the root until
+	 * initialization is finished.
+	 */
+	current->no_cgroup_migration = 1;
+}
+
+static inline void cgroup_kthread_ready(void)
+{
+	/*
+	 * This kthread finished initialization.  The creator should have
+	 * set PF_NO_SETAFFINITY if this kthread should stay in the root.
+	 */
+	current->no_cgroup_migration = 0;
+}
+
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
@@ -590,6 +609,8 @@ static inline void cgroup_free(struct task_struct *p) {}
 
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
+static inline void cgroup_init_kthreadd(void) {}
+static inline void cgroup_kthread_ready(void) {}
 
 static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 					       struct cgroup *ancestor)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d67eee84fd43..4cf9a59a4d08 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -604,6 +604,10 @@ struct task_struct {
 #ifdef CONFIG_COMPAT_BRK
 	unsigned			brk_randomized:1;
 #endif
+#ifdef CONFIG_CGROUPS
+	/* disallow userland-initiated cgroup migration */
+	unsigned			no_cgroup_migration:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 0125589c7428..638ef7568495 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2425,11 +2425,12 @@ ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 		tsk = tsk->group_leader;
 
 	/*
-	 * Workqueue threads may acquire PF_NO_SETAFFINITY and become
-	 * trapped in a cpuset, or RT worker may be born in a cgroup
-	 * with no rt_runtime allocated.  Just say no.
+	 * kthreads may acquire PF_NO_SETAFFINITY during initialization.
+	 * If userland migrates such a kthread to a non-root cgroup, it can
+	 * become trapped in a cpuset, or RT kthread may be born in a
+	 * cgroup with no rt_runtime allocated.  Just say no.
 	 */
-	if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
+	if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
 		ret = -EINVAL;
 		goto out_unlock_rcu;
 	}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2f26adea0f84..26db528c1d88 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,6 +20,7 @@
 #include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
+#include <linux/cgroup.h>
 #include <trace/events/sched.h>
 
 static DEFINE_SPINLOCK(kthread_create_lock);
@@ -225,6 +226,7 @@ static int kthread(void *_create)
 
 	ret = -EINTR;
 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+		cgroup_kthread_ready();
 		__kthread_parkme(self);
 		ret = threadfn(data);
 	}
@@ -538,6 +540,7 @@ int kthreadd(void *unused)
 	set_mems_allowed(node_states[N_MEMORY]);
 
 	current->flags |= PF_NOFREEZE;
+	cgroup_init_kthreadd();
 
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);

From 90f6e150e44a0dc3883110eeb3ab35d1be42b6bb Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 16 Mar 2017 18:20:49 +0000
Subject: [PATCH 012/410] arm/arm64: KVM: Take mmap_sem in stage2_unmap_vm

We don't hold the mmap_sem while searching for the VMAs when
we try to unmap each memslot for a VM. Fix this properly to
avoid unexpected results.

Fixes: commit 957db105c997 ("arm/arm64: KVM: Introduce stage2_unmap_vm")
Cc: stable@vger.kernel.org # v3.19+
Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/mmu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 962616fd4ddd..f2e2e0c6d6fd 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -803,6 +803,7 @@ void stage2_unmap_vm(struct kvm *kvm)
 	int idx;
 
 	idx = srcu_read_lock(&kvm->srcu);
+	down_read(&current->mm->mmap_sem);
 	spin_lock(&kvm->mmu_lock);
 
 	slots = kvm_memslots(kvm);
@@ -810,6 +811,7 @@ void stage2_unmap_vm(struct kvm *kvm)
 		stage2_unmap_memslot(kvm, memslot);
 
 	spin_unlock(&kvm->mmu_lock);
+	up_read(&current->mm->mmap_sem);
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 

From 72f310481a08db821b614e7b5d00febcc9064b36 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 16 Mar 2017 18:20:50 +0000
Subject: [PATCH 013/410] arm/arm64: KVM: Take mmap_sem in
 kvm_arch_prepare_memory_region

We don't hold the mmap_sem while searching for VMAs (via find_vma), in
kvm_arch_prepare_memory_region, which can end up in expected failures.

Fixes: commit 8eef91239e57 ("arm/arm64: KVM: map MMIO regions at creation time")
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Eric Auger <eric.auger@rehat.com>
Cc: stable@vger.kernel.org # v3.18+
Reviewed-by: Christoffer Dall <cdall@linaro.org>
[ Handle dirty page logging failure case ]
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/mmu.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index f2e2e0c6d6fd..13b9c1fa8961 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1803,6 +1803,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	    (KVM_PHYS_SIZE >> PAGE_SHIFT))
 		return -EFAULT;
 
+	down_read(&current->mm->mmap_sem);
 	/*
 	 * A memory region could potentially cover multiple VMAs, and any holes
 	 * between them, so iterate over all of them to find out if we can map
@@ -1846,8 +1847,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 			pa += vm_start - vma->vm_start;
 
 			/* IO region dirty page logging not allowed */
-			if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
-				return -EINVAL;
+			if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+				ret = -EINVAL;
+				goto out;
+			}
 
 			ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
 						    vm_end - vm_start,
@@ -1859,7 +1862,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	} while (hva < reg_end);
 
 	if (change == KVM_MR_FLAGS_ONLY)
-		return ret;
+		goto out;
 
 	spin_lock(&kvm->mmu_lock);
 	if (ret)
@@ -1867,6 +1870,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	else
 		stage2_flush_memslot(kvm, memslot);
 	spin_unlock(&kvm->mmu_lock);
+out:
+	up_read(&current->mm->mmap_sem);
 	return ret;
 }
 

From 0d963b6e650d9d5533223f3dbcde7dda466df65c Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Mon, 20 Mar 2017 11:54:11 -0400
Subject: [PATCH 014/410] dm cache metadata: fix metadata2 format's
 blocks_are_clean_separate_dirty

The dm_bitset_cursor_begin() call was using the incorrect nr_entries.
Also, the last dm_bitset_cursor_next() must be avoided if we're at the
end of the cursor.

Fixes: 7f1b21591a6 ("dm cache metadata: use cursor api in blocks_are_clean_separate_dirty()")
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-metadata.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index e4c2c1a1e993..6735c8d6a445 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -932,7 +932,7 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
 	*result = true;
 
 	r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
-				   from_cblock(begin), &cmd->dirty_cursor);
+				   from_cblock(cmd->cache_blocks), &cmd->dirty_cursor);
 	if (r) {
 		DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__);
 		return r;
@@ -959,14 +959,16 @@ static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
 			return 0;
 		}
 
+		begin = to_cblock(from_cblock(begin) + 1);
+		if (begin == end)
+			break;
+
 		r = dm_bitset_cursor_next(&cmd->dirty_cursor);
 		if (r) {
 			DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__);
 			dm_bitset_cursor_end(&cmd->dirty_cursor);
 			return r;
 		}
-
-		begin = to_cblock(from_cblock(begin) + 1);
 	}
 
 	dm_bitset_cursor_end(&cmd->dirty_cursor);

From ae5c682113f9f94cc5e76f92cf041ee624c173ee Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 19 Mar 2017 22:35:59 +0800
Subject: [PATCH 015/410] netfilter: nfnl_cthelper: fix incorrect
 helper->expect_class_max

The helper->expect_class_max must be set to the total number of
expect_policy minus 1, since we will use the statement "if (class >
helper->expect_class_max)" to validate the CTA_EXPECT_CLASS attr in
ctnetlink_alloc_expect.

So for compatibility, set the helper->expect_class_max to the
NFCTH_POLICY_SET_NUM attr's value minus 1.

Also: it's invalid when the NFCTH_POLICY_SET_NUM attr's value is zero.
1. this will result "expect_policy = kzalloc(0, GFP_KERNEL);";
2. we cannot set the helper->expect_class_max to a proper value.

So if nla_get_be32(tb[NFCTH_POLICY_SET_NUM]) is zero, report -EINVAL to
the userspace.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index de8782345c86..3cd41d105407 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -161,6 +161,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
 	int i, ret;
 	struct nf_conntrack_expect_policy *expect_policy;
 	struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
+	unsigned int class_max;
 
 	ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
 			       nfnl_cthelper_expect_policy_set);
@@ -170,19 +171,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
 	if (!tb[NFCTH_POLICY_SET_NUM])
 		return -EINVAL;
 
-	helper->expect_class_max =
-		ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
-
-	if (helper->expect_class_max != 0 &&
-	    helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES)
+	class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+	if (class_max == 0)
+		return -EINVAL;
+	if (class_max > NF_CT_MAX_EXPECT_CLASSES)
 		return -EOVERFLOW;
 
 	expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) *
-				helper->expect_class_max, GFP_KERNEL);
+				class_max, GFP_KERNEL);
 	if (expect_policy == NULL)
 		return -ENOMEM;
 
-	for (i=0; i<helper->expect_class_max; i++) {
+	for (i = 0; i < class_max; i++) {
 		if (!tb[NFCTH_POLICY_SET+i])
 			goto err;
 
@@ -191,6 +191,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
 		if (ret < 0)
 			goto err;
 	}
+
+	helper->expect_class_max = class_max - 1;
 	helper->expect_policy = expect_policy;
 	return 0;
 err:
@@ -377,10 +379,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb,
 		goto nla_put_failure;
 
 	if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM,
-			 htonl(helper->expect_class_max)))
+			 htonl(helper->expect_class_max + 1)))
 		goto nla_put_failure;
 
-	for (i=0; i<helper->expect_class_max; i++) {
+	for (i = 0; i < helper->expect_class_max + 1; i++) {
 		nest_parms2 = nla_nest_start(skb,
 				(NFCTH_POLICY_SET+i) | NLA_F_NESTED);
 		if (nest_parms2 == NULL)

From 28787bf47b11b08290918dcf91b08764cb5fe122 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Mon, 20 Mar 2017 12:25:51 +0100
Subject: [PATCH 016/410] ARM: sun8i: a33: remove highest OPP to fix CPU
 crashes

The highest supported frequency (1.2GHz) requires to "overvolt" the CPU.
However, some boards still do not have the cpu-supply DT property in the
cpu DT node which means that the CPU will always run with the same input
voltage but try to run at 1.2GHz frequency. This is the source of
(experienced) CPU crashes.

Remove the OPP which requires overvolting the CPU until all boards have
a cpu-supply property.

Fixes: 03749eb88e63 ("ARM: dts: sun8i: add opp-v2 table for A33")
Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun8i-a33.dtsi | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi
index 18c174fef84f..045d488cc7ed 100644
--- a/arch/arm/boot/dts/sun8i-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a33.dtsi
@@ -66,12 +66,6 @@
 			opp-microvolt = <1200000>;
 			clock-latency-ns = <244144>; /* 8 32k periods */
 		};
-
-		opp@1200000000 {
-			opp-hz = /bits/ 64 <1200000000>;
-			opp-microvolt = <1320000>;
-			clock-latency-ns = <244144>; /* 8 32k periods */
-		};
 	};
 
 	cpus {

From f3fbd7ec62dec1528fb8044034e2885f2b257941 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Feb 2017 00:03:38 +0900
Subject: [PATCH 017/410] arm: kprobes: Allow to handle reentered kprobe on
 single-stepping

This is arm port of commit 6a5022a56ac3 ("kprobes/x86: Allow to
handle reentered kprobe on single-stepping")

Since the FIQ handlers can interrupt in the single stepping
(or preparing the single stepping, do_debug etc.), we should
consider a kprobe is hit in the NMI handler. Even in that
case, the kprobe is allowed to be reentered as same as the
kprobes hit in kprobe handlers
(KPROBE_HIT_ACTIVE or KPROBE_HIT_SSDONE).

The real issue will happen when a kprobe hit while another
reentered kprobe is processing (KPROBE_REENTER), because
we already consumed a saved-area for the previous kprobe.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 arch/arm/probes/kprobes/core.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index b6dc9d838a9a..35148b46c4f5 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -271,6 +271,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
 			switch (kcb->kprobe_status) {
 			case KPROBE_HIT_ACTIVE:
 			case KPROBE_HIT_SSDONE:
+			case KPROBE_HIT_SS:
 				/* A pre- or post-handler probe got us here. */
 				kprobes_inc_nmissed_count(p);
 				save_previous_kprobe(kcb);
@@ -279,6 +280,11 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
 				singlestep(p, regs, kcb);
 				restore_previous_kprobe(kcb);
 				break;
+			case KPROBE_REENTER:
+				/* A nested probe was hit in FIQ, it is a BUG */
+				pr_warn("Unrecoverable kprobe detected at %p.\n",
+					p->addr);
+				/* fall through */
 			default:
 				/* impossible cases */
 				BUG();

From 91fc862c613ab7a0ef6b0b7755c33619127f4e5a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Feb 2017 00:04:48 +0900
Subject: [PATCH 018/410] arm: kprobes: Skip single-stepping in recursing path
 if possible

Kprobes/arm skips single-stepping (moreover handling the event)
if the conditional instruction must not be executed. This
also apply the rule when we hit the recursing kprobe, so
that kprobe does not count nmissed up in that case.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 arch/arm/probes/kprobes/core.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index 35148b46c4f5..269f66e66ff5 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -266,7 +266,15 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
 #endif
 
 	if (p) {
-		if (cur) {
+		if (!p->ainsn.insn_check_cc(regs->ARM_cpsr)) {
+			/*
+			 * Probe hit but conditional execution check failed,
+			 * so just skip the instruction and continue as if
+			 * nothing had happened.
+			 * In this case, we can skip recursing check too.
+			 */
+			singlestep_skip(p, regs);
+		} else if (cur) {
 			/* Kprobe is pending, so we're recursing. */
 			switch (kcb->kprobe_status) {
 			case KPROBE_HIT_ACTIVE:
@@ -289,7 +297,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
 				/* impossible cases */
 				BUG();
 			}
-		} else if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) {
+		} else {
 			/* Probe hit and conditional execution check ok. */
 			set_current_kprobe(p);
 			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
@@ -310,13 +318,6 @@ void __kprobes kprobe_handler(struct pt_regs *regs)
 				}
 				reset_current_kprobe();
 			}
-		} else {
-			/*
-			 * Probe hit but conditional execution check failed,
-			 * so just skip the instruction and continue as if
-			 * nothing had happened.
-			 */
-			singlestep_skip(p, regs);
 		}
 	} else if (cur) {
 		/* We probably hit a jprobe.  Call its break handler. */

From 06553175f585b52509c7df37d6f4a50aacb7b211 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Feb 2017 00:05:59 +0900
Subject: [PATCH 019/410] arm: kprobes: Fix the return address of multiple
 kretprobes

This is arm port of commit 737480a0d525 ("kprobes/x86:
Fix the return address of multiple kretprobes").

Fix the return address of subsequent kretprobes when multiple
kretprobes are set on the same function.

For example:

  # cd /sys/kernel/debug/tracing
  # echo "r:event1 sys_symlink" > kprobe_events
  # echo "r:event2 sys_symlink" >> kprobe_events
  # echo 1 > events/kprobes/enable
  # ln -s /tmp/foo /tmp/bar

 (without this patch)

  # cat trace | grep -v ^#
              ln-82    [000] dn.2    68.446525: event1: (kretprobe_trampoline+0x0/0x18 <- SyS_symlink)
              ln-82    [000] dn.2    68.447831: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink)

 (with this patch)

  # cat trace | grep -v ^#
              ln-81    [000] dn.1    39.463469: event1: (ret_fast_syscall+0x0/0x1c <- SyS_symlink)
              ln-81    [000] dn.1    39.464701: event2: (ret_fast_syscall+0x0/0x1c <- SyS_symlink)

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: KUMANO Syuhei <kumano.prog@gmail.com>
Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 arch/arm/probes/kprobes/core.c | 36 ++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index 269f66e66ff5..ad1f4e6a9e33 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -441,6 +441,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	struct hlist_node *tmp;
 	unsigned long flags, orig_ret_address = 0;
 	unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+	kprobe_opcode_t *correct_ret_addr = NULL;
 
 	INIT_HLIST_HEAD(&empty_rp);
 	kretprobe_hash_lock(current, &head, &flags);
@@ -463,15 +464,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 			/* another task is sharing our hash bucket */
 			continue;
 
-		if (ri->rp && ri->rp->handler) {
-			__this_cpu_write(current_kprobe, &ri->rp->kp);
-			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
-			ri->rp->handler(ri, regs);
-			__this_cpu_write(current_kprobe, NULL);
-		}
-
 		orig_ret_address = (unsigned long)ri->ret_addr;
-		recycle_rp_inst(ri, &empty_rp);
 
 		if (orig_ret_address != trampoline_address)
 			/*
@@ -483,6 +476,33 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 	}
 
 	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+	correct_ret_addr = ri->ret_addr;
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		if (ri->rp && ri->rp->handler) {
+			__this_cpu_write(current_kprobe, &ri->rp->kp);
+			get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+			ri->ret_addr = correct_ret_addr;
+			ri->rp->handler(ri, regs);
+			__this_cpu_write(current_kprobe, NULL);
+		}
+
+		recycle_rp_inst(ri, &empty_rp);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
 	kretprobe_hash_unlock(current, &flags);
 
 	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {

From 974310d047f3c7788a51d10c8d255eebdb1fa857 Mon Sep 17 00:00:00 2001
From: Jon Medhurst <tixy@linaro.org>
Date: Thu, 2 Mar 2017 13:04:09 +0000
Subject: [PATCH 020/410] arm: kprobes: Align stack to 8-bytes in test code

kprobes test cases need to have a stack that is aligned to an 8-byte
boundary because they call other functions (and the ARM ABI mandates
that alignment) and because test cases include 64-bit accesses to the
stack. Unfortunately, GCC doesn't ensure this alignment for inline
assembler and for the code in question seems to always misalign it by
pushing just the LR register onto the stack. We therefore need to
explicitly perform stack alignment at the start of each test case.

Without this fix, some test cases will generate alignment faults on
systems where alignment is enforced. Even if the kernel is configured to
handle these faults in software, triggering them is ugly. It also
exposes limitations in the fault handling code which doesn't cope with
writes to the stack. E.g. when handling this instruction

   strd r6, [sp, #-64]!

the fault handling code will write to a stack location below the SP
value at the point the fault occurred, which coincides with where the
exception handler has pushed the saved register context. This results in
corruption of those registers.

Signed-off-by: Jon Medhurst <tixy@linaro.org>
---
 arch/arm/probes/kprobes/test-core.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c
index c893726aa52d..1c98a87786ca 100644
--- a/arch/arm/probes/kprobes/test-core.c
+++ b/arch/arm/probes/kprobes/test-core.c
@@ -977,7 +977,10 @@ static void coverage_end(void)
 void __naked __kprobes_test_case_start(void)
 {
 	__asm__ __volatile__ (
-		"stmdb	sp!, {r4-r11}				\n\t"
+		"mov	r2, sp					\n\t"
+		"bic	r3, r2, #7				\n\t"
+		"mov	sp, r3					\n\t"
+		"stmdb	sp!, {r2-r11}				\n\t"
 		"sub	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
 		"bic	r0, lr, #1  @ r0 = inline data		\n\t"
 		"mov	r1, sp					\n\t"
@@ -997,7 +1000,8 @@ void __naked __kprobes_test_case_end_32(void)
 		"movne	pc, r0					\n\t"
 		"mov	r0, r4					\n\t"
 		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
-		"ldmia	sp!, {r4-r11}				\n\t"
+		"ldmia	sp!, {r2-r11}				\n\t"
+		"mov	sp, r2					\n\t"
 		"mov	pc, r0					\n\t"
 	);
 }
@@ -1013,7 +1017,8 @@ void __naked __kprobes_test_case_end_16(void)
 		"bxne	r0					\n\t"
 		"mov	r0, r4					\n\t"
 		"add	sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t"
-		"ldmia	sp!, {r4-r11}				\n\t"
+		"ldmia	sp!, {r2-r11}				\n\t"
+		"mov	sp, r2					\n\t"
 		"bx	r0					\n\t"
 	);
 }

From 7292ae3d5a18fb922be496e6bb687647193569b4 Mon Sep 17 00:00:00 2001
From: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Date: Mon, 20 Mar 2017 20:15:53 +0300
Subject: [PATCH 021/410] jump label: fix passing kbuild_cflags when checking
 for asm goto support

The latest change of asm goto support check added passing of KBUILD_CFLAGS
to compiler.  When these flags reference gcc plugins that are not built yet,
the check fails.

When one runs "make bzImage" followed by "make modules", the kernel is always
built with HAVE_JUMP_LABEL disabled, while the modules are built depending on
CONFIG_JUMP_LABEL.  If HAVE_JUMP_LABEL macro happens to be different, modules
are built with undefined references, e.g.:

ERROR: "static_key_slow_inc" [net/netfilter/xt_TEE.ko] undefined!
ERROR: "static_key_slow_dec" [net/netfilter/xt_TEE.ko] undefined!
ERROR: "static_key_slow_dec" [net/netfilter/nft_meta.ko] undefined!
ERROR: "static_key_slow_inc" [net/netfilter/nft_meta.ko] undefined!
ERROR: "nf_hooks_needed" [net/netfilter/ipvs/ip_vs.ko] undefined!
ERROR: "nf_hooks_needed" [net/ipv6/ipv6.ko] undefined!
ERROR: "static_key_count" [net/ipv6/ipv6.ko] undefined!
ERROR: "static_key_slow_inc" [net/ipv6/ipv6.ko] undefined!

This change moves the check before all these references are added
to KBUILD_CFLAGS.  This is correct because subsequent KBUILD_CFLAGS
modifications are not relevant to this check.

Reported-by: Anton V. Boyarshinov <boyarsh@altlinux.org>
Fixes: 35f860f9ba6a ("jump label: pass kbuild_cflags when checking for asm goto support")
Cc: stable@vger.kernel.org	# v4.10
Signed-off-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: David Lin <dtwlin@google.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index b8a6b862ed73..74d69f241543 100644
--- a/Makefile
+++ b/Makefile
@@ -653,6 +653,12 @@ KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
 
+# check for 'asm goto'
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
+	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+	KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
+endif
+
 include scripts/Makefile.gcc-plugins
 
 ifdef CONFIG_READABLE_ASM
@@ -798,12 +804,6 @@ KBUILD_CFLAGS   += $(call cc-option,-Werror=incompatible-pointer-types)
 # use the deterministic mode of AR if available
 KBUILD_ARFLAGS := $(call ar-option,D)
 
-# check for 'asm goto'
-ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y)
-	KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
-	KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
-endif
-
 include scripts/Makefile.kasan
 include scripts/Makefile.extrawarn
 include scripts/Makefile.ubsan

From 9be3213b14d44f6328281941193d97f3b97e7d4c Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Mon, 13 Mar 2017 20:33:41 +0100
Subject: [PATCH 022/410] gconfig: remove misleading parentheses around a
 condition

When building the kernel with clang, the compiler complains about the
presence of a condition inside two pairs of parentheses:

    scripts/kconfig/gconf.c:917:19: error: equality comparison with
    extraneous parentheses [-Werror,-Wparentheses-equality]
                    } else if ((col == COL_OPTION)) {
                                ~~~~^~~~~~~~~~~~~
    scripts/kconfig/gconf.c:917:19: note: remove extraneous parentheses
    around the comparison to silence this warning
                    } else if ((col == COL_OPTION)) {
                               ~    ^            ~
    scripts/kconfig/gconf.c:917:19: note: use '=' to turn this equality
    comparison into an assignment
                    } else if ((col == COL_OPTION)) {
                                    ^~
                                    =

Silence this warning by removing a level of parentheses.

Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/kconfig/gconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c
index 26d208b435a0..cfddddb9c9d7 100644
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c
@@ -914,7 +914,7 @@ on_treeview2_button_press_event(GtkWidget * widget,
 			current = menu;
 			display_tree_part();
 			gtk_widget_set_sensitive(back_btn, TRUE);
-		} else if ((col == COL_OPTION)) {
+		} else if (col == COL_OPTION) {
 			toggle_sym_value(menu);
 			gtk_tree_view_expand_row(view, path, TRUE);
 		}

From fb2155e3c30dc2043b52020e26965067a3e7779c Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Tue, 21 Mar 2017 14:39:19 +0000
Subject: [PATCH 023/410] MIPS: smp-cps: Fix retrieval of VPE mask on big
 endian CPUs

The vpe_mask member of struct core_boot_config is of type atomic_t,
which is a 32bit type. In cps-vec.S this member was being retrieved by a
PTR_L macro, which on 64bit systems is a 64bit load. On little endian
systems this is OK, since the double word that is retrieved will have
the required less significant word in the correct position. However, on
big endian systems the less significant word of the load is retrieved
from address+4, and the more significant from address+0. The destination
register therefore ends up with the required word in the more
significant word
e.g. when starting the second VP of a big endian 64bit system, the load

PTR_L    ta2, COREBOOTCFG_VPEMASK(a0)

ends up setting register ta2 to 0x0000000300000000

When this value is written to the CPC it is ignored, since it is
invalid to write anything larger than 4 bits. This results in any VP
other than VP0 in a core failing to start in 64bit big endian systems.

Change the load to a 32bit load word instruction to fix the bug.

Fixes: f12401d7219f ("MIPS: smp-cps: Pull boot config retrieval out of mips_cps_boot_vpes")
Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15787/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/cps-vec.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index 59476a607add..a00e87b0256d 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -361,7 +361,7 @@ LEAF(mips_cps_get_bootcfg)
 	END(mips_cps_get_bootcfg)
 
 LEAF(mips_cps_boot_vpes)
-	PTR_L	ta2, COREBOOTCFG_VPEMASK(a0)
+	lw	ta2, COREBOOTCFG_VPEMASK(a0)
 	PTR_L	ta3, COREBOOTCFG_VPECONFIG(a0)
 
 #if defined(CONFIG_CPU_MIPSR6)

From 6ef90877eee63a0d03e83183bb44b64229b624e6 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Wed, 15 Mar 2017 23:26:42 +0100
Subject: [PATCH 024/410] MIPS: Lantiq: fix missing xbar kernel panic

Commit 08b3c894e565 ("MIPS: lantiq: Disable xbar fpi burst mode")
accidentally requested the resources from the pmu address region
instead of the xbar registers region, but the check for the return
value of request_mem_region() was wrong. Commit 98ea51cb0c8c ("MIPS:
Lantiq: Fix another request_mem_region() return code check") fixed the
check of the return value of request_mem_region() which made the kernel
panics.
This patch now makes use of the correct memory region for the cross bar.

Fixes: 08b3c894e565 ("MIPS: lantiq: Disable xbar fpi burst mode")
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: John Crispin <john@phrozen.org>
Cc: james.hogan@imgtec.com
Cc: arnd@arndb.de
Cc: sergei.shtylyov@cogentembedded.com
Cc: john@phrozen.org
Cc: <stable@vger.kernel.org> # 4.4.x-
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/15751
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/lantiq/xway/sysctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
index 3c3aa05891dd..95bec460b651 100644
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -467,7 +467,7 @@ void __init ltq_soc_init(void)
 
 		if (!np_xbar)
 			panic("Failed to load xbar nodes from devicetree");
-		if (of_address_to_resource(np_pmu, 0, &res_xbar))
+		if (of_address_to_resource(np_xbar, 0, &res_xbar))
 			panic("Failed to get xbar resources");
 		if (!request_mem_region(res_xbar.start, resource_size(&res_xbar),
 			res_xbar.name))

From 033cffeedbd11c140952b98e8639bf652091a17d Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 16 Mar 2017 21:00:25 +0800
Subject: [PATCH 025/410] MIPS: Add MIPS_CPU_FTLB for Loongson-3A R2

Loongson-3A R2 and newer CPU have FTLB, but Config0.MT is 1, so add
MIPS_CPU_FTLB to the CPU options.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15752/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/cpu-probe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 07718bb5fc9d..12422fd4af23 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -1824,7 +1824,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
 		}
 
 		decode_configs(c);
-		c->options |= MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
+		c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
 		c->writecombine = _CACHE_UNCACHED_ACCELERATED;
 		break;
 	default:

From 5a34133167dce36666ea054e30a561b7f4413b7f Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 16 Mar 2017 21:00:26 +0800
Subject: [PATCH 026/410] MIPS: Check TLB before handle_ri_rdhwr() for
 Loongson-3

Loongson-3's micro TLB (ITLB) is not strictly a subset of JTLB. That
means: when a JTLB entry is replaced by hardware, there may be an old
valid entry exists in ITLB. So, a TLB miss exception may occur while
handle_ri_rdhwr() is running because it try to access EPC's content.
However, handle_ri_rdhwr() doesn't clear EXL, which makes a TLB Refill
exception be treated as a TLB Invalid exception and tlbp may fail. In
this case, if FTLB (which is usually set-associative instead of set-
associative) is enabled, a tlbp failure will cause an invalid tlbwi,
which will hang the whole system.

This patch rename handle_ri_rdhwr_vivt to handle_ri_rdhwr_tlbp and use
it for Loongson-3. It try to solve the same problem described as below,
but more straightforwards.

https://patchwork.linux-mips.org/patch/12591/

I think Loongson-2 has the same problem, but it has no FTLB, so we just
keep it as is.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: Rui Wang <wangr@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15753/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/genex.S |  4 ++--
 arch/mips/kernel/traps.c | 17 +++++++++++++----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 7ec9612cb007..2ac6c2625c13 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -519,7 +519,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	BUILD_HANDLER reserved reserved sti verbose	/* others */
 
 	.align	5
-	LEAF(handle_ri_rdhwr_vivt)
+	LEAF(handle_ri_rdhwr_tlbp)
 	.set	push
 	.set	noat
 	.set	noreorder
@@ -538,7 +538,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
 	.set	pop
 	bltz	k1, handle_ri	/* slow path */
 	/* fall thru */
-	END(handle_ri_rdhwr_vivt)
+	END(handle_ri_rdhwr_tlbp)
 
 	LEAF(handle_ri_rdhwr)
 	.set	push
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c7d17cfb32f6..b49e7bf9f950 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -83,7 +83,7 @@ extern asmlinkage void handle_dbe(void);
 extern asmlinkage void handle_sys(void);
 extern asmlinkage void handle_bp(void);
 extern asmlinkage void handle_ri(void);
-extern asmlinkage void handle_ri_rdhwr_vivt(void);
+extern asmlinkage void handle_ri_rdhwr_tlbp(void);
 extern asmlinkage void handle_ri_rdhwr(void);
 extern asmlinkage void handle_cpu(void);
 extern asmlinkage void handle_ov(void);
@@ -2408,9 +2408,18 @@ void __init trap_init(void)
 
 	set_except_vector(EXCCODE_SYS, handle_sys);
 	set_except_vector(EXCCODE_BP, handle_bp);
-	set_except_vector(EXCCODE_RI, rdhwr_noopt ? handle_ri :
-			  (cpu_has_vtag_icache ?
-			   handle_ri_rdhwr_vivt : handle_ri_rdhwr));
+
+	if (rdhwr_noopt)
+		set_except_vector(EXCCODE_RI, handle_ri);
+	else {
+		if (cpu_has_vtag_icache)
+			set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
+		else if (current_cpu_type() == CPU_LOONGSON3)
+			set_except_vector(EXCCODE_RI, handle_ri_rdhwr_tlbp);
+		else
+			set_except_vector(EXCCODE_RI, handle_ri_rdhwr);
+	}
+
 	set_except_vector(EXCCODE_CPU, handle_cpu);
 	set_except_vector(EXCCODE_OV, handle_ov);
 	set_except_vector(EXCCODE_TR, handle_tr);

From 0115f6cbf26663c86496bc56eeea293f85b77897 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 16 Mar 2017 21:00:27 +0800
Subject: [PATCH 027/410] MIPS: Flush wrong invalid FTLB entry for huge page

On VTLB+FTLB platforms (such as Loongson-3A R2), FTLB's pagesize is
usually configured the same as PAGE_SIZE. In such a case, Huge page
entry is not suitable to write in FTLB.

Unfortunately, when a huge page is created, its page table entries
haven't created immediately. Then the TLB refill handler will fetch an
invalid page table entry which has no "HUGE" bit, and this entry may be
written to FTLB. Since it is invalid, TLB load/store handler will then
use tlbwi to write the valid entry at the same place. However, the
valid entry is a huge page entry which isn't suitable for FTLB.

Our solution is to modify build_huge_handler_tail. Flush the invalid
old entry (whether it is in FTLB or VTLB, this is in order to reduce
branches) and use tlbwr to write the valid new entry.

Signed-off-by: Rui Wang <wangr@lemote.com>
Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: Huacai Chen <chenhc@lemote.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15754/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/tlbex.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 9bfee8988eaf..4f642e07c2b1 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -760,7 +760,8 @@ static void build_huge_update_entries(u32 **p, unsigned int pte,
 static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r,
 				    struct uasm_label **l,
 				    unsigned int pte,
-				    unsigned int ptr)
+				    unsigned int ptr,
+				    unsigned int flush)
 {
 #ifdef CONFIG_SMP
 	UASM_i_SC(p, pte, 0, ptr);
@@ -769,6 +770,22 @@ static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r,
 #else
 	UASM_i_SW(p, pte, 0, ptr);
 #endif
+	if (cpu_has_ftlb && flush) {
+		BUG_ON(!cpu_has_tlbinv);
+
+		UASM_i_MFC0(p, ptr, C0_ENTRYHI);
+		uasm_i_ori(p, ptr, ptr, MIPS_ENTRYHI_EHINV);
+		UASM_i_MTC0(p, ptr, C0_ENTRYHI);
+		build_tlb_write_entry(p, l, r, tlb_indexed);
+
+		uasm_i_xori(p, ptr, ptr, MIPS_ENTRYHI_EHINV);
+		UASM_i_MTC0(p, ptr, C0_ENTRYHI);
+		build_huge_update_entries(p, pte, ptr);
+		build_huge_tlb_write_entry(p, l, r, pte, tlb_random, 0);
+
+		return;
+	}
+
 	build_huge_update_entries(p, pte, ptr);
 	build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0);
 }
@@ -2199,7 +2216,7 @@ static void build_r4000_tlb_load_handler(void)
 		uasm_l_tlbl_goaround2(&l, p);
 	}
 	uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID));
-	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
+	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1);
 #endif
 
 	uasm_l_nopage_tlbl(&l, p);
@@ -2254,7 +2271,7 @@ static void build_r4000_tlb_store_handler(void)
 	build_tlb_probe_entry(&p);
 	uasm_i_ori(&p, wr.r1, wr.r1,
 		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
-	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
+	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1);
 #endif
 
 	uasm_l_nopage_tlbs(&l, p);
@@ -2310,7 +2327,7 @@ static void build_r4000_tlb_modify_handler(void)
 	build_tlb_probe_entry(&p);
 	uasm_i_ori(&p, wr.r1, wr.r1,
 		   _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY);
-	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2);
+	build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 0);
 #endif
 
 	uasm_l_nopage_tlbm(&l, p);

From 0be032c190abcdcfa948082b6a1e0d461184ba4d Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 16 Mar 2017 21:00:29 +0800
Subject: [PATCH 028/410] MIPS: c-r4k: Fix Loongson-3's vcache/scache waysize
 calculation

If scache.waysize is 0, r4k___flush_cache_all() will do nothing and
then cause bugs. BTW, though vcache.waysize isn't being used by now,
we also fix its calculation.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: John Crispin <john@phrozen.org>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15756/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/c-r4k.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index e7f798d55fbc..3fe99cb271a9 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1562,6 +1562,7 @@ static void probe_vcache(void)
 	vcache_size = c->vcache.sets * c->vcache.ways * c->vcache.linesz;
 
 	c->vcache.waybit = 0;
+	c->vcache.waysize = vcache_size / c->vcache.ways;
 
 	pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n",
 		vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz);
@@ -1664,6 +1665,7 @@ static void __init loongson3_sc_init(void)
 	/* Loongson-3 has 4 cores, 1MB scache for each. scaches are shared */
 	scache_size *= 4;
 	c->scache.waybit = 0;
+	c->scache.waysize = scache_size / c->scache.ways;
 	pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n",
 	       scache_size >> 10, way_string[c->scache.ways], c->scache.linesz);
 	if (scache_size)

From ea33c2c2051a266f68d9cd920c789cec828c8f11 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Tue, 21 Mar 2017 16:36:01 +0100
Subject: [PATCH 029/410] ARM: sun8i: a33: add operating-points-v2 property to
 all nodes

The OPP are declared as shared but no operating points are declared for
cpu1, 2 and 3. Thus, the following error happens during the boot:

cpu cpu1: dev_pm_opp_of_get_sharing_cpus: Couldn't find tcpu_dev node.

This patch applies the operating points to each cpu of the A33.

Fixes: 03749eb88e63 ("ARM: dts: sun8i: add opp-v2 table for A33")
Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm/boot/dts/sun8i-a33.dtsi | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi
index 045d488cc7ed..a367c0ac4e76 100644
--- a/arch/arm/boot/dts/sun8i-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a33.dtsi
@@ -75,16 +75,22 @@
 			operating-points-v2 = <&cpu0_opp_table>;
 		};
 
+		cpu@1 {
+			operating-points-v2 = <&cpu0_opp_table>;
+		};
+
 		cpu@2 {
 			compatible = "arm,cortex-a7";
 			device_type = "cpu";
 			reg = <2>;
+			operating-points-v2 = <&cpu0_opp_table>;
 		};
 
 		cpu@3 {
 			compatible = "arm,cortex-a7";
 			device_type = "cpu";
 			reg = <3>;
+			operating-points-v2 = <&cpu0_opp_table>;
 		};
 	};
 

From d82c0d12c92705ef468683c9b7a8298dd61ed191 Mon Sep 17 00:00:00 2001
From: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
Date: Mon, 13 Mar 2017 12:14:58 -0300
Subject: [PATCH 030/410] s390/decompressor: fix initrd corruption caused by
 bss clear

Reorder the operations in decompress_kernel() to ensure initrd is moved
to a safe location before the bss section is zeroed.

During decompression bss can overlap with the initrd and this can
corrupt the initrd contents depending on the size of the compressed
kernel (which affects where the initrd is placed by the bootloader) and
the size of the bss section of the decompressor.

Also use the correct initrd size when checking for overlaps with
parmblock.

Fixes: 06c0dd72aea3 ([S390] fix boot failures with compressed kernels)
Cc: stable@vger.kernel.org
Reviewed-by: Joy Latten <joy.latten@canonical.com>
Reviewed-by: Vineetha HariPai <vineetha.hari.pai@canonical.com>
Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri@canonical.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/compressed/misc.c | 35 +++++++++++++++++---------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index fa95041fa9f6..33ca29333e18 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -141,31 +141,34 @@ static void check_ipl_parmblock(void *start, unsigned long size)
 
 unsigned long decompress_kernel(void)
 {
-	unsigned long output_addr;
-	unsigned char *output;
+	void *output, *kernel_end;
 
-	output_addr = ((unsigned long) &_end + HEAP_SIZE + 4095UL) & -4096UL;
-	check_ipl_parmblock((void *) 0, output_addr + SZ__bss_start);
-	memset(&_bss, 0, &_ebss - &_bss);
-	free_mem_ptr = (unsigned long)&_end;
-	free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
-	output = (unsigned char *) output_addr;
+	output = (void *) ALIGN((unsigned long) &_end + HEAP_SIZE, PAGE_SIZE);
+	kernel_end = output + SZ__bss_start;
+	check_ipl_parmblock((void *) 0, (unsigned long) kernel_end);
 
 #ifdef CONFIG_BLK_DEV_INITRD
 	/*
 	 * Move the initrd right behind the end of the decompressed
-	 * kernel image.
+	 * kernel image. This also prevents initrd corruption caused by
+	 * bss clearing since kernel_end will always be located behind the
+	 * current bss section..
 	 */
-	if (INITRD_START && INITRD_SIZE &&
-	    INITRD_START < (unsigned long) output + SZ__bss_start) {
-		check_ipl_parmblock(output + SZ__bss_start,
-				    INITRD_START + INITRD_SIZE);
-		memmove(output + SZ__bss_start,
-			(void *) INITRD_START, INITRD_SIZE);
-		INITRD_START = (unsigned long) output + SZ__bss_start;
+	if (INITRD_START && INITRD_SIZE && kernel_end > (void *) INITRD_START) {
+		check_ipl_parmblock(kernel_end, INITRD_SIZE);
+		memmove(kernel_end, (void *) INITRD_START, INITRD_SIZE);
+		INITRD_START = (unsigned long) kernel_end;
 	}
 #endif
 
+	/*
+	 * Clear bss section. free_mem_ptr and free_mem_end_ptr need to be
+	 * initialized afterwards since they reside in bss.
+	 */
+	memset(&_bss, 0, &_ebss - &_bss);
+	free_mem_ptr = (unsigned long) &_end;
+	free_mem_end_ptr = free_mem_ptr + HEAP_SIZE;
+
 	puts("Uncompressing Linux... ");
 	__decompress(input_data, input_len, NULL, NULL, output, 0, NULL, error);
 	puts("Ok, booting the kernel.\n");

From 0861b5a754fd5539f527b445aaa61538185c8264 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 16 Mar 2017 11:02:36 +0100
Subject: [PATCH 031/410] s390/smp: fix ipl from cpu with non-zero address

Commit af51160ebd3c ("s390/smp: initialize cpu_present_mask in
setup_arch") initializes the cpu_present_mask much earlier than
before. However the cpu detection code relies on the fact that iff
logical cpu 0 is marked present then also the corresponding physical
cpu address within the pcpu_devices array slot is valid.

Since commit 44fd22992cb7 ("[PATCH] Register the boot-cpu in the cpu
maps earlier") this assumption is not true anymore. The patch marks
logical cpu 0 as present in common code without that architecture code
had a chance to setup the logical to physical map.

With that change the cpu detection code assumes that the physical cpu
address of cpu 0 is also 0, which is not necessarily true.
Subsequently the physical cpu address of the ipl cpu will be mapped to
a different logical cpu. If that cpu is brought online later the ipl
cpu will send itself an initial cpu reset sigp signal. This in turn
completely resets the ipl cpu and the system stops working.

A dump of such a system looks like a "store status" has been
forgotten. But actually the kernel itself removed all traces which
would allow to easily tell what went wrong.

To fix this initialize the logical to physical cpu address already in
smp_setup_processor_id(). In addition remove the initialization of the
cpu_present_mask and cpu_online_mask for cpu 0, since that has already
been done. Also add a sanity check, just in case common code will be
changed again...

The problem can be easily reproduced within a z/VM guest:

> chcpu -d 0
> vmcp ipl

Fixes: af51160ebd3c ("s390/smp: initialize cpu_present_mask in setup_arch")
Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/smp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 47a973b5b4f1..5dab859b0d54 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -909,13 +909,11 @@ void __init smp_prepare_boot_cpu(void)
 {
 	struct pcpu *pcpu = pcpu_devices;
 
+	WARN_ON(!cpu_present(0) || !cpu_online(0));
 	pcpu->state = CPU_STATE_CONFIGURED;
-	pcpu->address = stap();
 	pcpu->lowcore = (struct lowcore *)(unsigned long) store_prefix();
 	S390_lowcore.percpu_offset = __per_cpu_offset[0];
 	smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
-	set_cpu_present(0, true);
-	set_cpu_online(0, true);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -924,6 +922,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_setup_processor_id(void)
 {
+	pcpu_devices[0].address = stap();
 	S390_lowcore.cpu_nr = 0;
 	S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
 }

From ca681ec860d6533b8debda4f6ab2e70e6d519d89 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.vnet.ibm.com>
Date: Wed, 15 Mar 2017 10:58:07 +0100
Subject: [PATCH 032/410] s390/pkey: Fix wrong handling of secure key with old
 MKVP

When a secure key with an old Master Key Verification
Pattern was given to the pkey_findcard function, there was
no responsible card found because only the current MKVP of
each card was compared. With this fix also the old MKVP
values are considered and so a matching card able to handle
the key is reported back to the caller.

Signed-off-by: Harald Freudenberger <freude@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/pkey_api.c | 53 ++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 19 deletions(-)

diff --git a/drivers/s390/crypto/pkey_api.c b/drivers/s390/crypto/pkey_api.c
index 40f1136f5568..058db724b5a2 100644
--- a/drivers/s390/crypto/pkey_api.c
+++ b/drivers/s390/crypto/pkey_api.c
@@ -572,6 +572,12 @@ int pkey_sec2protkey(u16 cardnr, u16 domain,
 		rc = -EIO;
 		goto out;
 	}
+	if (prepcblk->ccp_rscode != 0) {
+		DEBUG_WARN(
+			"pkey_sec2protkey unwrap secure key warning, card response %d/%d\n",
+			(int) prepcblk->ccp_rtcode,
+			(int) prepcblk->ccp_rscode);
+	}
 
 	/* process response cprb param block */
 	prepcblk->rpl_parmb = ((u8 *) prepcblk) + sizeof(struct CPRBX);
@@ -761,9 +767,10 @@ out:
 }
 
 /*
- * Fetch just the mkvp value via query_crypto_facility from adapter.
+ * Fetch the current and old mkvp values via
+ * query_crypto_facility from adapter.
  */
-static int fetch_mkvp(u16 cardnr, u16 domain, u64 *mkvp)
+static int fetch_mkvp(u16 cardnr, u16 domain, u64 mkvp[2])
 {
 	int rc, found = 0;
 	size_t rlen, vlen;
@@ -779,9 +786,10 @@ static int fetch_mkvp(u16 cardnr, u16 domain, u64 *mkvp)
 	rc = query_crypto_facility(cardnr, domain, "STATICSA",
 				   rarray, &rlen, varray, &vlen);
 	if (rc == 0 && rlen > 8*8 && vlen > 184+8) {
-		if (rarray[64] == '2') {
+		if (rarray[8*8] == '2') {
 			/* current master key state is valid */
-			*mkvp = *((u64 *)(varray + 184));
+			mkvp[0] = *((u64 *)(varray + 184));
+			mkvp[1] = *((u64 *)(varray + 172));
 			found = 1;
 		}
 	}
@@ -796,14 +804,14 @@ struct mkvp_info {
 	struct list_head list;
 	u16 cardnr;
 	u16 domain;
-	u64 mkvp;
+	u64 mkvp[2];
 };
 
 /* a list with mkvp_info entries */
 static LIST_HEAD(mkvp_list);
 static DEFINE_SPINLOCK(mkvp_list_lock);
 
-static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
+static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 mkvp[2])
 {
 	int rc = -ENOENT;
 	struct mkvp_info *ptr;
@@ -812,7 +820,7 @@ static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
 	list_for_each_entry(ptr, &mkvp_list, list) {
 		if (ptr->cardnr == cardnr &&
 		    ptr->domain == domain) {
-			*mkvp = ptr->mkvp;
+			memcpy(mkvp, ptr->mkvp, 2 * sizeof(u64));
 			rc = 0;
 			break;
 		}
@@ -822,7 +830,7 @@ static int mkvp_cache_fetch(u16 cardnr, u16 domain, u64 *mkvp)
 	return rc;
 }
 
-static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
+static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp[2])
 {
 	int found = 0;
 	struct mkvp_info *ptr;
@@ -831,7 +839,7 @@ static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
 	list_for_each_entry(ptr, &mkvp_list, list) {
 		if (ptr->cardnr == cardnr &&
 		    ptr->domain == domain) {
-			ptr->mkvp = mkvp;
+			memcpy(ptr->mkvp, mkvp, 2 * sizeof(u64));
 			found = 1;
 			break;
 		}
@@ -844,7 +852,7 @@ static void mkvp_cache_update(u16 cardnr, u16 domain, u64 mkvp)
 		}
 		ptr->cardnr = cardnr;
 		ptr->domain = domain;
-		ptr->mkvp = mkvp;
+		memcpy(ptr->mkvp, mkvp, 2 * sizeof(u64));
 		list_add(&ptr->list, &mkvp_list);
 	}
 	spin_unlock_bh(&mkvp_list_lock);
@@ -888,8 +896,8 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 	struct secaeskeytoken *t = (struct secaeskeytoken *) seckey;
 	struct zcrypt_device_matrix *device_matrix;
 	u16 card, dom;
-	u64 mkvp;
-	int i, rc;
+	u64 mkvp[2];
+	int i, rc, oi = -1;
 
 	/* mkvp must not be zero */
 	if (t->mkvp == 0)
@@ -910,14 +918,14 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 		    device_matrix->device[i].functions & 0x04) {
 			/* an enabled CCA Coprocessor card */
 			/* try cached mkvp */
-			if (mkvp_cache_fetch(card, dom, &mkvp) == 0 &&
-			    t->mkvp == mkvp) {
+			if (mkvp_cache_fetch(card, dom, mkvp) == 0 &&
+			    t->mkvp == mkvp[0]) {
 				if (!verify)
 					break;
 				/* verify: fetch mkvp from adapter */
-				if (fetch_mkvp(card, dom, &mkvp) == 0) {
+				if (fetch_mkvp(card, dom, mkvp) == 0) {
 					mkvp_cache_update(card, dom, mkvp);
-					if (t->mkvp == mkvp)
+					if (t->mkvp == mkvp[0])
 						break;
 				}
 			}
@@ -936,14 +944,21 @@ int pkey_findcard(const struct pkey_seckey *seckey,
 			card = AP_QID_CARD(device_matrix->device[i].qid);
 			dom = AP_QID_QUEUE(device_matrix->device[i].qid);
 			/* fresh fetch mkvp from adapter */
-			if (fetch_mkvp(card, dom, &mkvp) == 0) {
+			if (fetch_mkvp(card, dom, mkvp) == 0) {
 				mkvp_cache_update(card, dom, mkvp);
-				if (t->mkvp == mkvp)
+				if (t->mkvp == mkvp[0])
 					break;
+				if (t->mkvp == mkvp[1] && oi < 0)
+					oi = i;
 			}
 		}
+		if (i >= MAX_ZDEV_ENTRIES && oi >= 0) {
+			/* old mkvp matched, use this card then */
+			card = AP_QID_CARD(device_matrix->device[oi].qid);
+			dom = AP_QID_QUEUE(device_matrix->device[oi].qid);
+		}
 	}
-	if (i < MAX_ZDEV_ENTRIES) {
+	if (i < MAX_ZDEV_ENTRIES || oi >= 0) {
 		if (pcardnr)
 			*pcardnr = card;
 		if (pdomain)

From db8466c581cca1a08b505f1319c3ecd246f16fa8 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Tue, 21 Mar 2017 14:52:25 +0000
Subject: [PATCH 033/410] MIPS: IRQ Stack: Unwind IRQ stack onto task stack

When the separate IRQ stack was introduced, stack unwinding only
proceeded as far as the top of the IRQ stack, leading to kernel
backtraces being less useful, lacking the trace of what was interrupted.

Fix this by providing a means for the kernel to unwind the IRQ stack
onto the interrupted task stack. The processor state is saved to the
kernel task stack on interrupt. The IRQ_STACK_START macro reserves an
unsigned long at the top of the IRQ stack where the interrupted task
stack pointer can be saved. After the active stack is switched to the
IRQ stack, save the interrupted tasks stack pointer to the reserved
location.

Fix the stack unwinding code to look for the frame being the top of the
IRQ stack and if so get the next frame from the saved location. The
existing test does not work with the separate stack since the ra is no
longer pointed at ret_from_{irq,exception}.

The test to stop unwinding the stack 32 bytes from the top of a stack
must be modified to allow unwinding to continue up to the location of
the saved task stack pointer when on the IRQ stack. The low / high marks
of the stack are set depending on whether the sp is on an irq stack or
not.

Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: Masanari Iida <standby24x7@gmail.com>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason A. Donenfeld <jason@zx2c4.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/15788/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/irq.h    | 15 +++++++++
 arch/mips/kernel/asm-offsets.c |  1 +
 arch/mips/kernel/genex.S       |  8 +++--
 arch/mips/kernel/process.c     | 56 +++++++++++++++++++++++-----------
 4 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 956db6e201d1..ddd1c918103b 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -18,9 +18,24 @@
 #include <irq.h>
 
 #define IRQ_STACK_SIZE			THREAD_SIZE
+#define IRQ_STACK_START			(IRQ_STACK_SIZE - sizeof(unsigned long))
 
 extern void *irq_stack[NR_CPUS];
 
+/*
+ * The highest address on the IRQ stack contains a dummy frame put down in
+ * genex.S (handle_int & except_vec_vi_handler) which is structured as follows:
+ *
+ *   top ------------
+ *       | task sp  | <- irq_stack[cpu] + IRQ_STACK_START
+ *       ------------
+ *       |          | <- First frame of IRQ context
+ *       ------------
+ *
+ * task sp holds a copy of the task stack pointer where the struct pt_regs
+ * from exception entry can be found.
+ */
+
 static inline bool on_irq_stack(int cpu, unsigned long sp)
 {
 	unsigned long low = (unsigned long)irq_stack[cpu];
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index bb5c5d34ba81..a670c0c11875 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -102,6 +102,7 @@ void output_thread_info_defines(void)
 	DEFINE(_THREAD_SIZE, THREAD_SIZE);
 	DEFINE(_THREAD_MASK, THREAD_MASK);
 	DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
+	DEFINE(_IRQ_STACK_START, IRQ_STACK_START);
 	BLANK();
 }
 
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 2ac6c2625c13..ae810da4d499 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -215,9 +215,11 @@ NESTED(handle_int, PT_SIZE, sp)
 	beq	t0, t1, 2f
 
 	/* Switch to IRQ stack */
-	li	t1, _IRQ_STACK_SIZE
+	li	t1, _IRQ_STACK_START
 	PTR_ADD sp, t0, t1
 
+	/* Save task's sp on IRQ stack so that unwinding can follow it */
+	LONG_S	s1, 0(sp)
 2:
 	jal	plat_irq_dispatch
 
@@ -325,9 +327,11 @@ NESTED(except_vec_vi_handler, 0, sp)
 	beq	t0, t1, 2f
 
 	/* Switch to IRQ stack */
-	li	t1, _IRQ_STACK_SIZE
+	li	t1, _IRQ_STACK_START
 	PTR_ADD sp, t0, t1
 
+	/* Save task's sp on IRQ stack so that unwinding can follow it */
+	LONG_S	s1, 0(sp)
 2:
 	jalr	v0
 
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index fb6b6b650719..b68e10fc453d 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -488,31 +488,52 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
 					      unsigned long pc,
 					      unsigned long *ra)
 {
+	unsigned long low, high, irq_stack_high;
 	struct mips_frame_info info;
 	unsigned long size, ofs;
+	struct pt_regs *regs;
 	int leaf;
-	extern void ret_from_irq(void);
-	extern void ret_from_exception(void);
 
 	if (!stack_page)
 		return 0;
 
 	/*
-	 * If we reached the bottom of interrupt context,
-	 * return saved pc in pt_regs.
+	 * IRQ stacks start at IRQ_STACK_START
+	 * task stacks at THREAD_SIZE - 32
 	 */
-	if (pc == (unsigned long)ret_from_irq ||
-	    pc == (unsigned long)ret_from_exception) {
-		struct pt_regs *regs;
-		if (*sp >= stack_page &&
-		    *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) {
-			regs = (struct pt_regs *)*sp;
-			pc = regs->cp0_epc;
-			if (!user_mode(regs) && __kernel_text_address(pc)) {
-				*sp = regs->regs[29];
-				*ra = regs->regs[31];
-				return pc;
-			}
+	low = stack_page;
+	if (!preemptible() && on_irq_stack(raw_smp_processor_id(), *sp)) {
+		high = stack_page + IRQ_STACK_START;
+		irq_stack_high = high;
+	} else {
+		high = stack_page + THREAD_SIZE - 32;
+		irq_stack_high = 0;
+	}
+
+	/*
+	 * If we reached the top of the interrupt stack, start unwinding
+	 * the interrupted task stack.
+	 */
+	if (unlikely(*sp == irq_stack_high)) {
+		unsigned long task_sp = *(unsigned long *)*sp;
+
+		/*
+		 * Check that the pointer saved in the IRQ stack head points to
+		 * something within the stack of the current task
+		 */
+		if (!object_is_on_stack((void *)task_sp))
+			return 0;
+
+		/*
+		 * Follow pointer to tasks kernel stack frame where interrupted
+		 * state was saved.
+		 */
+		regs = (struct pt_regs *)task_sp;
+		pc = regs->cp0_epc;
+		if (!user_mode(regs) && __kernel_text_address(pc)) {
+			*sp = regs->regs[29];
+			*ra = regs->regs[31];
+			return pc;
 		}
 		return 0;
 	}
@@ -533,8 +554,7 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
 	if (leaf < 0)
 		return 0;
 
-	if (*sp < stack_page ||
-	    *sp + info.frame_size > stack_page + THREAD_SIZE - 32)
+	if (*sp < low || *sp + info.frame_size > high)
 		return 0;
 
 	if (leaf)

From 2c422257550f123049552b39f7af6e3428a60f43 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 21 Mar 2017 13:32:37 +0100
Subject: [PATCH 034/410] netfilter: nfnl_cthelper: fix runtime expectation
 policy updates

We only allow runtime updates of expectation policies for timeout and
maximum number of expectations, otherwise reject the update.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Liping Zhang <zlpnobody@gmail.com>
---
 net/netfilter/nfnetlink_cthelper.c | 86 +++++++++++++++++++++++++++++-
 1 file changed, 84 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 3cd41d105407..90f291e27eb1 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -255,6 +255,89 @@ err:
 	return ret;
 }
 
+static int
+nfnl_cthelper_update_policy_one(const struct nf_conntrack_expect_policy *policy,
+				struct nf_conntrack_expect_policy *new_policy,
+				const struct nlattr *attr)
+{
+	struct nlattr *tb[NFCTH_POLICY_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr,
+			       nfnl_cthelper_expect_pol);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFCTH_POLICY_NAME] ||
+	    !tb[NFCTH_POLICY_EXPECT_MAX] ||
+	    !tb[NFCTH_POLICY_EXPECT_TIMEOUT])
+		return -EINVAL;
+
+	if (nla_strcmp(tb[NFCTH_POLICY_NAME], policy->name))
+		return -EBUSY;
+
+	new_policy->max_expected =
+		ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
+	new_policy->timeout =
+		ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT]));
+
+	return 0;
+}
+
+static int nfnl_cthelper_update_policy_all(struct nlattr *tb[],
+					   struct nf_conntrack_helper *helper)
+{
+	struct nf_conntrack_expect_policy new_policy[helper->expect_class_max + 1];
+	struct nf_conntrack_expect_policy *policy;
+	int i, err;
+
+	/* Check first that all policy attributes are well-formed, so we don't
+	 * leave things in inconsistent state on errors.
+	 */
+	for (i = 0; i < helper->expect_class_max + 1; i++) {
+
+		if (!tb[NFCTH_POLICY_SET + i])
+			return -EINVAL;
+
+		err = nfnl_cthelper_update_policy_one(&helper->expect_policy[i],
+						      &new_policy[i],
+						      tb[NFCTH_POLICY_SET + i]);
+		if (err < 0)
+			return err;
+	}
+	/* Now we can safely update them. */
+	for (i = 0; i < helper->expect_class_max + 1; i++) {
+		policy = (struct nf_conntrack_expect_policy *)
+				&helper->expect_policy[i];
+		policy->max_expected = new_policy->max_expected;
+		policy->timeout	= new_policy->timeout;
+	}
+
+	return 0;
+}
+
+static int nfnl_cthelper_update_policy(struct nf_conntrack_helper *helper,
+				       const struct nlattr *attr)
+{
+	struct nlattr *tb[NFCTH_POLICY_SET_MAX + 1];
+	unsigned int class_max;
+	int err;
+
+	err = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
+			       nfnl_cthelper_expect_policy_set);
+	if (err < 0)
+		return err;
+
+	if (!tb[NFCTH_POLICY_SET_NUM])
+		return -EINVAL;
+
+	class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+	if (helper->expect_class_max + 1 != class_max)
+		return -EBUSY;
+
+	return nfnl_cthelper_update_policy_all(tb, helper);
+}
+
 static int
 nfnl_cthelper_update(const struct nlattr * const tb[],
 		     struct nf_conntrack_helper *helper)
@@ -265,8 +348,7 @@ nfnl_cthelper_update(const struct nlattr * const tb[],
 		return -EBUSY;
 
 	if (tb[NFCTH_POLICY]) {
-		ret = nfnl_cthelper_parse_expect_policy(helper,
-							tb[NFCTH_POLICY]);
+		ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
 		if (ret < 0)
 			return ret;
 	}

From f83bf8da1135ca635aac8f062cad3f001fcf3a26 Mon Sep 17 00:00:00 2001
From: Jeffy Chen <jeffy.chen@rock-chips.com>
Date: Tue, 21 Mar 2017 15:07:10 +0800
Subject: [PATCH 035/410] netfilter: nfnl_cthelper: Fix memory leak

We have memory leaks of nf_conntrack_helper & expect_policy.

Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 90f291e27eb1..2b987d2a77bc 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -216,7 +216,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 
 	ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
 	if (ret < 0)
-		goto err;
+		goto err1;
 
 	strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
 	helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
@@ -247,10 +247,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 
 	ret = nf_conntrack_helper_register(helper);
 	if (ret < 0)
-		goto err;
+		goto err2;
 
 	return 0;
-err:
+err2:
+	kfree(helper->expect_policy);
+err1:
 	kfree(helper);
 	return ret;
 }
@@ -696,6 +698,8 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 
 			found = true;
 			nf_conntrack_helper_unregister(cur);
+			kfree(cur->expect_policy);
+			kfree(cur);
 		}
 	}
 	/* Make sure we return success if we flush and there is no helpers */
@@ -759,6 +763,8 @@ static void __exit nfnl_cthelper_exit(void)
 				continue;
 
 			nf_conntrack_helper_unregister(cur);
+			kfree(cur->expect_policy);
+			kfree(cur);
 		}
 	}
 }

From 06e1a5cc570703796ff1bd3a712e8e3b15c6bb0d Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Mon, 6 Mar 2017 12:56:55 -0600
Subject: [PATCH 036/410] ARM: dts: OMAP3: Fix MFG ID EEPROM

The manufacturing information is stored in the EEPROM.  This chip
is an AT24C64 not not (nor has it ever been) 24C02.  This patch will
correctly address the EEPROM to read the entire contents and not just
256 bytes (of 0xff).

Fixes: 5e3447a29a38 ("ARM: dts: LogicPD Torpedo: Add AT24 EEPROM Support")

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/logicpd-torpedo-som.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
index 8f9a69ca818c..efe53998c961 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -121,7 +121,7 @@
 &i2c3 {
 	clock-frequency = <400000>;
 	at24@50 {
-		compatible = "at24,24c02";
+		compatible = "atmel,24c64";
 		readonly;
 		reg = <0x50>;
 	};

From ce2899428ec0249512023aecaa3530c29f775a4f Mon Sep 17 00:00:00 2001
From: Yegor Yefremov <yegorslists@googlemail.com>
Date: Mon, 13 Mar 2017 10:03:14 +0100
Subject: [PATCH 037/410] ARM: dts: am335x-baltos: disable EEE for Atheros 8035
 PHY

Though cpsw doesn't support EEE feature, Atheros 8035 provides
automatic EEE support that is enabled by default. This causes
occasional link drops when link partner also announces EEE support.
These link drops occur on both 100Mbit/s and 1000Mbit/s speeds.
So disable EEE advertising completely.

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am335x-baltos.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/am335x-baltos.dtsi b/arch/arm/boot/dts/am335x-baltos.dtsi
index efb5eae290a8..d42b98f15e8b 100644
--- a/arch/arm/boot/dts/am335x-baltos.dtsi
+++ b/arch/arm/boot/dts/am335x-baltos.dtsi
@@ -371,6 +371,8 @@
 
 	phy1: ethernet-phy@1 {
 		reg = <7>;
+		eee-broken-100tx;
+		eee-broken-1000t;
 	};
 };
 

From 7d79f6098d82f8c09914d7799bc96891ad9c3baf Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 21 Mar 2017 21:03:01 -0500
Subject: [PATCH 038/410] ARM: dts: ti: fix PCI bus dtc warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

dtc recently added PCI bus checks. Fix these warnings.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: "Benoît Cousson" <bcousson@baylibre.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: linux-omap@vger.kernel.org
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/dra7.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 2c9e56f4aac5..bbfb9d5a70a9 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -283,6 +283,7 @@
 				device_type = "pci";
 				ranges = <0x81000000 0 0          0x03000 0 0x00010000
 					  0x82000000 0 0x20013000 0x13000 0 0xffed000>;
+				bus-range = <0x00 0xff>;
 				#interrupt-cells = <1>;
 				num-lanes = <1>;
 				linux,pci-domain = <0>;
@@ -319,6 +320,7 @@
 				device_type = "pci";
 				ranges = <0x81000000 0 0          0x03000 0 0x00010000
 					  0x82000000 0 0x30013000 0x13000 0 0xffed000>;
+				bus-range = <0x00 0xff>;
 				#interrupt-cells = <1>;
 				num-lanes = <1>;
 				linux,pci-domain = <1>;

From f35ec35e5e726e7137deaee0dec24f8cbf07a48f Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 22 Mar 2017 22:40:30 -0700
Subject: [PATCH 039/410] net: phy: Export mdiobus_register_board_info()

We can build modular code that uses mdiobus_register_board_info() which would
lead to linking failure since this symbol is not expoerted.

Fixes: 648ea0134069 ("net: phy: Allow pre-declaration of MDIO devices")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-boardinfo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/mdio-boardinfo.c b/drivers/net/phy/mdio-boardinfo.c
index 6b988f77da08..61941e29daae 100644
--- a/drivers/net/phy/mdio-boardinfo.c
+++ b/drivers/net/phy/mdio-boardinfo.c
@@ -84,3 +84,4 @@ int mdiobus_register_board_info(const struct mdio_board_info *info,
 
 	return 0;
 }
+EXPORT_SYMBOL(mdiobus_register_board_info);

From 48481c8fa16410ffa45939b13b6c53c2ca609e5f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 23 Mar 2017 12:39:21 -0700
Subject: [PATCH 040/410] net: neigh: guard against NULL solicit() method

Dmitry posted a nice reproducer of a bug triggering in neigh_probe()
when dereferencing a NULL neigh->ops->solicit method.

This can happen for arp_direct_ops/ndisc_direct_ops and similar,
which can be used for NUD_NOARP neighbours (created when dev->header_ops
is NULL). Admin can then force changing nud_state to some other state
that would fire neigh timer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e7c12caa20c8..4526cbd7e28a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -860,7 +860,8 @@ static void neigh_probe(struct neighbour *neigh)
 	if (skb)
 		skb = skb_clone(skb, GFP_ATOMIC);
 	write_unlock(&neigh->lock);
-	neigh->ops->solicit(neigh, skb);
+	if (neigh->ops->solicit)
+		neigh->ops->solicit(neigh, skb);
 	atomic_inc(&neigh->probes);
 	kfree_skb(skb);
 }

From a95600294157ca7527ee7c70249fb53e09d8c566 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Thu, 26 Jan 2017 14:43:32 +0200
Subject: [PATCH 041/410] iwlwifi: mvm: fix accessing fw_id_to_mac_id

Access should be by rcu_dereference. Issue was found by sparse.

Fixes: 65e254821cee ("iwlwifi: mvm: use firmware station PM notification for AP_LINK_PS")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 6927caecd48e..486dcceed17a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2401,7 +2401,7 @@ void iwl_mvm_sta_pm_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
 		return;
 
 	rcu_read_lock();
-	sta = mvm->fw_id_to_mac_id[notif->sta_id];
+	sta = rcu_dereference(mvm->fw_id_to_mac_id[notif->sta_id]);
 	if (WARN_ON(IS_ERR_OR_NULL(sta))) {
 		rcu_read_unlock();
 		return;

From 251fe09f13bfb54c1ede66ee8bf8ddd0061c4f7c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 23 Mar 2017 13:40:00 +0300
Subject: [PATCH 042/410] iwlwifi: mvm: writing zero bytes to debugfs causes a
 crash

This is a static analysis fix.  The warning is:

	drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c:912 iwl_mvm_fw_dbg_collect()
	warn: integer overflows 'sizeof(*desc) + len'

I guess this code is supposed to take a NUL character, but if we write
zero bytes then it tries to write -1 characters and crashes.

Fixes: c91b865cb14d ("iwlwifi: mvm: support description for user triggered fw dbg collection")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index a260cd503200..077bfd8f4c0c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -1056,6 +1056,8 @@ static ssize_t iwl_dbgfs_fw_dbg_collect_write(struct iwl_mvm *mvm,
 
 	if (ret)
 		return ret;
+	if (count == 0)
+		return 0;
 
 	iwl_mvm_fw_dbg_collect(mvm, FW_DBG_TRIGGER_USER, buf,
 			       (count - 1), NULL);

From 4d339989acd730f17bc814b5ddb9c54e405766b6 Mon Sep 17 00:00:00 2001
From: Liad Kaufman <liad.kaufman@intel.com>
Date: Tue, 21 Mar 2017 17:13:16 +0200
Subject: [PATCH 043/410] iwlwifi: mvm: support ibss in dqa mode

Allow working IBSS also when working in DQA mode.
This is done by setting it to treat the queues the
same as a BSS AP treats the queues.

Fixes: 7948b87308a4 ("iwlwifi: mvm: enable dynamic queue allocation mode")
Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 3 ++-
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c      | 9 ++++++---
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c       | 7 +++++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index 99132ea16ede..c5734e1a02d2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -216,7 +216,8 @@ u32 iwl_mvm_mac_get_queues_mask(struct ieee80211_vif *vif)
 			qmask |= BIT(vif->hw_queue[ac]);
 	}
 
-	if (vif->type == NL80211_IFTYPE_AP)
+	if (vif->type == NL80211_IFTYPE_AP ||
+	    vif->type == NL80211_IFTYPE_ADHOC)
 		qmask |= BIT(vif->cab_queue);
 
 	return qmask;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index b51a2853cc80..9d28db7f56aa 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1806,7 +1806,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 			iwl_mvm_get_wd_timeout(mvm, vif, false, false);
 		int queue;
 
-		if (vif->type == NL80211_IFTYPE_AP)
+		if (vif->type == NL80211_IFTYPE_AP ||
+		    vif->type == NL80211_IFTYPE_ADHOC)
 			queue = IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
 		else if (vif->type == NL80211_IFTYPE_P2P_DEVICE)
 			queue = IWL_MVM_DQA_P2P_DEVICE_QUEUE;
@@ -1837,7 +1838,8 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	 * enabled-cab_queue to the mask)
 	 */
 	if (iwl_mvm_is_dqa_supported(mvm) &&
-	    vif->type == NL80211_IFTYPE_AP) {
+	    (vif->type == NL80211_IFTYPE_AP ||
+	     vif->type == NL80211_IFTYPE_ADHOC)) {
 		struct iwl_trans_txq_scd_cfg cfg = {
 			.fifo = IWL_MVM_TX_FIFO_MCAST,
 			.sta_id = mvmvif->bcast_sta.sta_id,
@@ -1862,7 +1864,8 @@ static void iwl_mvm_free_bcast_sta_queues(struct iwl_mvm *mvm,
 
 	lockdep_assert_held(&mvm->mutex);
 
-	if (vif->type == NL80211_IFTYPE_AP)
+	if (vif->type == NL80211_IFTYPE_AP ||
+	    vif->type == NL80211_IFTYPE_ADHOC)
 		iwl_mvm_disable_txq(mvm, vif->cab_queue, vif->cab_queue,
 				    IWL_MAX_TID_COUNT, 0);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 3f37075f4cde..1ba0a6f55503 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -506,6 +506,7 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm,
 
 	switch (info->control.vif->type) {
 	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_ADHOC:
 		/*
 		 * Handle legacy hostapd as well, where station may be added
 		 * only after assoc. Take care of the case where we send a
@@ -517,7 +518,8 @@ static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm,
 		if (info->hw_queue == info->control.vif->cab_queue)
 			return info->hw_queue;
 
-		WARN_ONCE(1, "fc=0x%02x", le16_to_cpu(fc));
+		WARN_ONCE(info->control.vif->type != NL80211_IFTYPE_ADHOC,
+			  "fc=0x%02x", le16_to_cpu(fc));
 		return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
 	case NL80211_IFTYPE_P2P_DEVICE:
 		if (ieee80211_is_mgmt(fc))
@@ -584,7 +586,8 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
 			iwl_mvm_vif_from_mac80211(info.control.vif);
 
 		if (info.control.vif->type == NL80211_IFTYPE_P2P_DEVICE ||
-		    info.control.vif->type == NL80211_IFTYPE_AP) {
+		    info.control.vif->type == NL80211_IFTYPE_AP ||
+		    info.control.vif->type == NL80211_IFTYPE_ADHOC) {
 			sta_id = mvmvif->bcast_sta.sta_id;
 			queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info,
 							   hdr->frame_control);

From 49d52e8108a21749dc2114b924c907db43358984 Mon Sep 17 00:00:00 2001
From: Nathan Sullivan <nathan.sullivan@ni.com>
Date: Wed, 22 Mar 2017 15:27:01 -0500
Subject: [PATCH 044/410] net: phy: handle state correctly in phy_stop_machine

If the PHY is halted on stop, then do not set the state to PHY_UP.  This
ensures the phy will be restarted later in phy_start when the machine is
started again.

Fixes: 00db8189d984 ("This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details.")
Signed-off-by: Nathan Sullivan <nathan.sullivan@ni.com>
Signed-off-by: Brad Mouring <brad.mouring@ni.com>
Acked-by: Xander Huff <xander.huff@ni.com>
Acked-by: Kyle Roeschley <kyle.roeschley@ni.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 1be69d8bc909..a2bfc82e95d7 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -681,7 +681,7 @@ void phy_stop_machine(struct phy_device *phydev)
 	cancel_delayed_work_sync(&phydev->state_queue);
 
 	mutex_lock(&phydev->lock);
-	if (phydev->state > PHY_UP)
+	if (phydev->state > PHY_UP && phydev->state != PHY_HALTED)
 		phydev->state = PHY_UP;
 	mutex_unlock(&phydev->lock);
 }

From 2f25abe6bac573928a990ccbdac75873add8127e Mon Sep 17 00:00:00 2001
From: hayeswang <hayeswang@realtek.com>
Date: Thu, 23 Mar 2017 19:14:19 +0800
Subject: [PATCH 045/410] r8152: prevent the driver from transmitting packets
 with carrier off

The linking status may be changed when autosuspend. And, after
autoresume, the driver may try to transmit packets when the device
is carrier off, because the interrupt transfer doesn't update the
linking status, yet. And, if the device is in ALDPS mode, the device
would stop working.

The another similar case is
 1. unplug the cable.
 2. interrupt transfer queue a work_queue for linking change.
 3. device enters the ALDPS mode.
 4. a tx occurs before the work_queue is called.

Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 0b1b9188625d..c34df33c6d72 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1294,6 +1294,7 @@ static void intr_callback(struct urb *urb)
 		}
 	} else {
 		if (netif_carrier_ok(tp->netdev)) {
+			netif_stop_queue(tp->netdev);
 			set_bit(RTL8152_LINK_CHG, &tp->flags);
 			schedule_delayed_work(&tp->schedule, 0);
 		}
@@ -3169,6 +3170,9 @@ static void set_carrier(struct r8152 *tp)
 			napi_enable(&tp->napi);
 			netif_wake_queue(netdev);
 			netif_info(tp, link, netdev, "carrier on\n");
+		} else if (netif_queue_stopped(netdev) &&
+			   skb_queue_len(&tp->tx_queue) < tp->tx_qlen) {
+			netif_wake_queue(netdev);
 		}
 	} else {
 		if (netif_carrier_ok(netdev)) {
@@ -3702,8 +3706,18 @@ static int rtl8152_resume(struct usb_interface *intf)
 			tp->rtl_ops.autosuspend_en(tp, false);
 			napi_disable(&tp->napi);
 			set_bit(WORK_ENABLE, &tp->flags);
-			if (netif_carrier_ok(tp->netdev))
-				rtl_start_rx(tp);
+
+			if (netif_carrier_ok(tp->netdev)) {
+				if (rtl8152_get_speed(tp) & LINK_STATUS) {
+					rtl_start_rx(tp);
+				} else {
+					netif_carrier_off(tp->netdev);
+					tp->rtl_ops.disable(tp);
+					netif_info(tp, link, tp->netdev,
+						   "linking down\n");
+				}
+			}
+
 			napi_enable(&tp->napi);
 			clear_bit(SELECTIVE_SUSPEND, &tp->flags);
 			smp_mb__after_atomic();

From 1adbddef118ae8bf6409c13208645d28c29731c1 Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:41 +0300
Subject: [PATCH 046/410] net:ethernet:aquantia: Remove adapter re-opening when
 MTU changed.

Closing/opening the adapter is not needed at all.
The new MTU settings take effect immediately.

Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_main.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index d05fbfdce5e5..5d6c40d86775 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -100,11 +100,6 @@ static int aq_ndev_change_mtu(struct net_device *ndev, int new_mtu)
 		goto err_exit;
 	ndev->mtu = new_mtu;
 
-	if (netif_running(ndev)) {
-		aq_ndev_close(ndev);
-		aq_ndev_open(ndev);
-	}
-
 err_exit:
 	return err;
 }

From ea0504f554c8f989eab58549300d15582d36f039 Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:42 +0300
Subject: [PATCH 047/410] net:ethernet:aquantia: Fix packet type detection
 (TCP/UDP) for IPv6.

In order for the checksum offloads to work correctly we need to set the
packet type bit (TCP/UDP) in the TX context buffer.

Fixes: 97bde5c4f909 ("net: ethernet: aquantia: Support for NIC-specific code")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Tested-by: David Arcari <darcari@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/aq_nic.c   | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index ee78444bfb88..db2b51da2988 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -510,10 +510,22 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		dx_buff->is_ip_cso = (htons(ETH_P_IP) == skb->protocol) ?
 			1U : 0U;
-		dx_buff->is_tcp_cso =
-			(ip_hdr(skb)->protocol == IPPROTO_TCP) ? 1U : 0U;
-		dx_buff->is_udp_cso =
-			(ip_hdr(skb)->protocol == IPPROTO_UDP) ? 1U : 0U;
+
+		if (ip_hdr(skb)->version == 4) {
+			dx_buff->is_tcp_cso =
+				(ip_hdr(skb)->protocol == IPPROTO_TCP) ?
+					1U : 0U;
+			dx_buff->is_udp_cso =
+				(ip_hdr(skb)->protocol == IPPROTO_UDP) ?
+					1U : 0U;
+		} else if (ip_hdr(skb)->version == 6) {
+			dx_buff->is_tcp_cso =
+				(ipv6_hdr(skb)->nexthdr == NEXTHDR_TCP) ?
+					1U : 0U;
+			dx_buff->is_udp_cso =
+				(ipv6_hdr(skb)->nexthdr == NEXTHDR_UDP) ?
+					1U : 0U;
+		}
 	}
 
 	for (; nr_frags--; ++frag_count) {

From 9f0dd8c322e89f137c44b437d6fbecc9dea12204 Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:43 +0300
Subject: [PATCH 048/410] net:ethernet:aquantia: Missing spinlock
 initialization.

Fix for missing initialization aq_ring header.lock spinlock.

Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 0358e6072d45..3a8a4aa13687 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -101,6 +101,7 @@ int aq_ring_init(struct aq_ring_s *self)
 	self->hw_head = 0;
 	self->sw_head = 0;
 	self->sw_tail = 0;
+	spin_lock_init(&self->header.lock);
 	return 0;
 }
 

From 386aff88e32ec3f82e3f032217bad0c8c8846349 Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:44 +0300
Subject: [PATCH 049/410] net:ethernet:aquantia: Fix for LSO with IPv6.

Fix Context Command bit: L3 type = "0" for IPv4, "1" for IPv6.

Fixes: bab6de8fd180 ("net: ethernet: aquantia:
 Atlantic A0 and B0 specific functions.")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c           | 3 +++
 drivers/net/ethernet/aquantia/atlantic/aq_ring.h          | 3 ++-
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 3 +++
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 3 +++
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index db2b51da2988..cdb02991f249 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -487,6 +487,9 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self,
 		dx_buff->mss = skb_shinfo(skb)->gso_size;
 		dx_buff->is_txc = 1U;
 
+		dx_buff->is_ipv6 =
+			(ip_hdr(skb)->version == 6) ? 1U : 0U;
+
 		dx = aq_ring_next_dx(ring, dx);
 		dx_buff = &ring->buff_ring[dx];
 		++ret;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index 257254645068..eecd6d1c4d73 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -58,7 +58,8 @@ struct __packed aq_ring_buff_s {
 			u8 len_l2;
 			u8 len_l3;
 			u8 len_l4;
-			u8 rsvd2;
+			u8 is_ipv6:1;
+			u8 rsvd2:7;
 			u32 len_pkt;
 		};
 	};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index a2b746a2dd50..a536875a7d0d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -433,6 +433,9 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
 				    buff->len_l3 +
 				    buff->len_l2);
 			is_gso = true;
+
+			if (buff->is_ipv6)
+				txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_IPV6;
 		} else {
 			buff_pa_len = buff->len;
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index cab2931dab9a..69488c9b03e2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -471,6 +471,9 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 				    buff->len_l3 +
 				    buff->len_l2);
 			is_gso = true;
+
+			if (buff->is_ipv6)
+				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_IPV6;
 		} else {
 			buff_pa_len = buff->len;
 

From 5d73bb863c2ef3aa1a28b5885c85ede7307df8ea Mon Sep 17 00:00:00 2001
From: Pavel Belous <pavel.belous@aquantia.com>
Date: Thu, 23 Mar 2017 14:19:45 +0300
Subject: [PATCH 050/410] net:ethernet:aquantia: Reset is_gso flag when EOP
 reached.

We need to reset is_gso flag when EOP reached (entire LSO packet processed).

Fixes: bab6de8fd180 ("net: ethernet: aquantia:
 Atlantic A0 and B0 specific functions.")

Signed-off-by: Pavel Belous <pavel.belous@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 1 +
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index a536875a7d0d..4ee15ff06a44 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -461,6 +461,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
 			if (unlikely(buff->is_eop)) {
 				txd->ctl |= HW_ATL_A0_TXD_CTL_EOP;
 				txd->ctl |= HW_ATL_A0_TXD_CTL_CMD_WB;
+				is_gso = false;
 			}
 		}
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 69488c9b03e2..42150708191d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -499,6 +499,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 			if (unlikely(buff->is_eop)) {
 				txd->ctl |= HW_ATL_B0_TXD_CTL_EOP;
 				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_WB;
+				is_gso = false;
 			}
 		}
 

From 7d969d2e8890f546c8cec634b3aa5f57d4eef883 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Thu, 23 Mar 2017 14:55:08 +0100
Subject: [PATCH 051/410] s390/qeth: size calculation outbound buffers

Depending on the device type, hard_start_xmit() builds different output
buffer formats. For instance with HiperSockets, on both L2 and L3 we
strip the ETH header from the skb - L3 doesn't need it, and L2 carries
it in the buffer's header element.
For this, we pass data_offset = ETH_HLEN all the way down to
__qeth_fill_buffer(), where skb->data is then adjusted accordingly.
But the initial size calculation still considers the *full* skb length
(including the ETH header). So qeth_get_elements_no() can erroneously
reject a skb as too big, even though it would actually fit into an
output buffer once the ETH header has been trimmed off later.

Fix this by passing an additional offset to qeth_get_elements_no(),
that indicates where in the skb the on-wire data actually begins.
Since the current code uses data_offset=-1 for some special handling
on OSA, we need to clamp data_offset to 0...

On HiperSockets this helps when sending ~MTU-size skbs with weird page
alignment. No change for OSA or AF_IUCV.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      | 3 ++-
 drivers/s390/net/qeth_core_main.c | 5 +++--
 drivers/s390/net/qeth_l2_main.c   | 5 +++--
 drivers/s390/net/qeth_l3_main.c   | 5 +++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index e7addea8741b..d9561e39c3b2 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -961,7 +961,8 @@ int qeth_bridgeport_query_ports(struct qeth_card *card,
 int qeth_bridgeport_setrole(struct qeth_card *card, enum qeth_sbp_roles role);
 int qeth_bridgeport_an_set(struct qeth_card *card, int enable);
 int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
-int qeth_get_elements_no(struct qeth_card *, struct sk_buff *, int);
+int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb,
+			 int extra_elems, int data_offset);
 int qeth_get_elements_for_frags(struct sk_buff *);
 int qeth_do_send_packet_fast(struct qeth_card *, struct qeth_qdio_out_q *,
 			struct sk_buff *, struct qeth_hdr *, int, int, int);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 315d8a2db7c0..9a5f99ccb122 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3837,6 +3837,7 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
  * @card:			qeth card structure, to check max. elems.
  * @skb:			SKB address
  * @extra_elems:		extra elems needed, to check against max.
+ * @data_offset:		range starts at skb->data + data_offset
  *
  * Returns the number of pages, and thus QDIO buffer elements, needed to cover
  * skb data, including linear part and fragments. Checks if the result plus
@@ -3844,10 +3845,10 @@ EXPORT_SYMBOL_GPL(qeth_get_elements_for_frags);
  * Note: extra_elems is not included in the returned result.
  */
 int qeth_get_elements_no(struct qeth_card *card,
-		     struct sk_buff *skb, int extra_elems)
+		     struct sk_buff *skb, int extra_elems, int data_offset)
 {
 	int elements = qeth_get_elements_for_range(
-				(addr_t)skb->data,
+				(addr_t)skb->data + data_offset,
 				(addr_t)skb->data + skb_headlen(skb)) +
 			qeth_get_elements_for_frags(skb);
 
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index bea483307618..af4e6a639fec 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -849,7 +849,7 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * chaining we can not send long frag lists
 	 */
 	if ((card->info.type != QETH_CARD_TYPE_IQD) &&
-	    !qeth_get_elements_no(card, new_skb, 0)) {
+	    !qeth_get_elements_no(card, new_skb, 0, 0)) {
 		int lin_rc = skb_linearize(new_skb);
 
 		if (card->options.performance_stats) {
@@ -894,7 +894,8 @@ static int qeth_l2_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	elements = qeth_get_elements_no(card, new_skb, elements_needed);
+	elements = qeth_get_elements_no(card, new_skb, elements_needed,
+					(data_offset > 0) ? data_offset : 0);
 	if (!elements) {
 		if (data_offset >= 0)
 			kmem_cache_free(qeth_core_header_cache, hdr);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 06d0addcc058..72aa9537a725 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2867,7 +2867,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 */
 	if ((card->info.type != QETH_CARD_TYPE_IQD) &&
 	    ((use_tso && !qeth_l3_get_elements_no_tso(card, new_skb, 1)) ||
-	     (!use_tso && !qeth_get_elements_no(card, new_skb, 0)))) {
+	     (!use_tso && !qeth_get_elements_no(card, new_skb, 0, 0)))) {
 		int lin_rc = skb_linearize(new_skb);
 
 		if (card->options.performance_stats) {
@@ -2909,7 +2909,8 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	elements = use_tso ?
 		   qeth_l3_get_elements_no_tso(card, new_skb, hdr_elements) :
-		   qeth_get_elements_no(card, new_skb, hdr_elements);
+		   qeth_get_elements_no(card, new_skb, hdr_elements,
+					(data_offset > 0) ? data_offset : 0);
 	if (!elements) {
 		if (data_offset >= 0)
 			kmem_cache_free(qeth_core_header_cache, hdr);

From acd9776b5c45ef02d1a210969a6fcc058afb76e3 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Date: Thu, 23 Mar 2017 14:55:09 +0100
Subject: [PATCH 052/410] s390/qeth: no ETH header for outbound AF_IUCV

With AF_IUCV traffic, the skb passed to hard_start_xmit() has a 14 byte
slot at skb->data, intended for an ETH header. qeth_l3_fill_af_iucv_hdr()
fills this ETH header... and then immediately moves it to the
skb's headroom, where it disappears and is never seen again.

But it's still possible for us to return NETDEV_TX_BUSY after the skb has
been modified. Since we didn't get a private copy of the skb, the next
time the skb is delivered to hard_start_xmit() it no longer has the
expected layout (we moved the ETH header to the headroom, so skb->data
now starts at the IUCV_TRANS header). So when qeth_l3_fill_af_iucv_hdr()
does another round of rebuilding, the resulting qeth header ends up
all wrong. On transmission, the buffer is then rejected by
the HiperSockets device with SBALF15 = x'04'.
When this error is passed back to af_iucv as TX_NOTIFY_UNREACHABLE, it
tears down the offending socket.

As the ETH header for AF_IUCV serves no purpose, just align the code to
what we do for IP traffic on L3 HiperSockets: keep the ETH header at
skb->data, and pass down data_offset = ETH_HLEN to qeth_fill_buffer().
When mapping the payload into the SBAL elements, the ETH header is then
stripped off. This avoids the skb manipulations in
qeth_l3_fill_af_iucv_hdr(), and any buffer re-entering hard_start_xmit()
after NETDEV_TX_BUSY is now processed properly.

Signed-off-by: Julian Wiedmann <jwi@linux.vnet.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 72aa9537a725..653f0fb76573 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2609,17 +2609,13 @@ static void qeth_l3_fill_af_iucv_hdr(struct qeth_card *card,
 	char daddr[16];
 	struct af_iucv_trans_hdr *iucv_hdr;
 
-	skb_pull(skb, 14);
-	card->dev->header_ops->create(skb, card->dev, 0,
-				      card->dev->dev_addr, card->dev->dev_addr,
-				      card->dev->addr_len);
-	skb_pull(skb, 14);
-	iucv_hdr = (struct af_iucv_trans_hdr *)skb->data;
 	memset(hdr, 0, sizeof(struct qeth_hdr));
 	hdr->hdr.l3.id = QETH_HEADER_TYPE_LAYER3;
 	hdr->hdr.l3.ext_flags = 0;
-	hdr->hdr.l3.length = skb->len;
+	hdr->hdr.l3.length = skb->len - ETH_HLEN;
 	hdr->hdr.l3.flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
+
+	iucv_hdr = (struct af_iucv_trans_hdr *) (skb->data + ETH_HLEN);
 	memset(daddr, 0, sizeof(daddr));
 	daddr[0] = 0xfe;
 	daddr[1] = 0x80;
@@ -2823,10 +2819,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if ((card->info.type == QETH_CARD_TYPE_IQD) &&
 	    !skb_is_nonlinear(skb)) {
 		new_skb = skb;
-		if (new_skb->protocol == ETH_P_AF_IUCV)
-			data_offset = 0;
-		else
-			data_offset = ETH_HLEN;
+		data_offset = ETH_HLEN;
 		hdr = kmem_cache_alloc(qeth_core_header_cache, GFP_ATOMIC);
 		if (!hdr)
 			goto tx_drop;

From 90b14dc731b1b4aac8ba01850b530bf7ba26462f Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.vnet.ibm.com>
Date: Thu, 23 Mar 2017 14:55:10 +0100
Subject: [PATCH 053/410] MAINTAINERS: add Julian Wiedmann

Add Julian Wiedmann as additional maintainer for drivers/s390/net
and net/iucv.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c45c02bc6082..e48678d15401 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10808,6 +10808,7 @@ F:	drivers/s390/block/dasd*
 F:	block/partitions/ibm.c
 
 S390 NETWORK DRIVERS
+M:	Julian Wiedmann <jwi@linux.vnet.ibm.com>
 M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
@@ -10838,6 +10839,7 @@ S:	Supported
 F:	drivers/s390/scsi/zfcp_*
 
 S390 IUCV NETWORK LAYER
+M:	Julian Wiedmann <jwi@linux.vnet.ibm.com>
 M:	Ursula Braun <ubraun@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/

From a5af83925363eb85d467933e3d6ec5a87001eb7c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 23 Mar 2017 17:07:26 +0100
Subject: [PATCH 054/410] bna: avoid writing uninitialized data into hw
 registers

The latest gcc-7 snapshot warns about bfa_ioc_send_enable/bfa_ioc_send_disable
writing undefined values into the hardware registers:

drivers/net/ethernet/brocade/bna/bfa_ioc.c: In function 'bfa_iocpf_sm_disabling_entry':
arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+4)' is used uninitialized in this function [-Werror=uninitialized]
arch/arm/include/asm/io.h:109:22: error: '*((void *)&disable_req+8)' is used uninitialized in this function [-Werror=uninitialized]

The two functions look like they should do the same thing, but only one
of them initializes the time stamp and clscode field. The fact that we
only get a warning for one of the two functions seems to be arbitrary,
based on the inlining decisions in the compiler.

To address this, I'm making both functions do the same thing:

- set the clscode from the ioc structure in both
- set the time stamp from ktime_get_real_seconds (which also
  avoids the signed-integer overflow in 2038 and extends the
  well-defined behavior until 2106).
- zero-fill the reserved field

Fixes: 8b230ed8ec96 ("bna: Brocade 10Gb Ethernet device driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/brocade/bna/bfa_ioc.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index 9e59663a6ead..0f6811860ad5 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
@@ -1930,13 +1930,13 @@ static void
 bfa_ioc_send_enable(struct bfa_ioc *ioc)
 {
 	struct bfi_ioc_ctrl_req enable_req;
-	struct timeval tv;
 
 	bfi_h2i_set(enable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_ENABLE_REQ,
 		    bfa_ioc_portid(ioc));
 	enable_req.clscode = htons(ioc->clscode);
-	do_gettimeofday(&tv);
-	enable_req.tv_sec = ntohl(tv.tv_sec);
+	enable_req.rsvd = htons(0);
+	/* overflow in 2106 */
+	enable_req.tv_sec = ntohl(ktime_get_real_seconds());
 	bfa_ioc_mbox_send(ioc, &enable_req, sizeof(struct bfi_ioc_ctrl_req));
 }
 
@@ -1947,6 +1947,10 @@ bfa_ioc_send_disable(struct bfa_ioc *ioc)
 
 	bfi_h2i_set(disable_req.mh, BFI_MC_IOC, BFI_IOC_H2I_DISABLE_REQ,
 		    bfa_ioc_portid(ioc));
+	disable_req.clscode = htons(ioc->clscode);
+	disable_req.rsvd = htons(0);
+	/* overflow in 2106 */
+	disable_req.tv_sec = ntohl(ktime_get_real_seconds());
 	bfa_ioc_mbox_send(ioc, &disable_req, sizeof(struct bfi_ioc_ctrl_req));
 }
 

From a80db69e47d764bbcaf2fec54b1f308925e7c490 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 23 Mar 2017 11:03:31 -0700
Subject: [PATCH 055/410] kcm: return immediately after copy_from_user()
 failure

There is no reason to continue after a copy_from_user()
failure.

Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 309062f3debe..31762f76cdb5 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1687,7 +1687,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		struct kcm_attach info;
 
 		if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-			err = -EFAULT;
+			return -EFAULT;
 
 		err = kcm_attach_ioctl(sock, &info);
 
@@ -1697,7 +1697,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		struct kcm_unattach info;
 
 		if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-			err = -EFAULT;
+			return -EFAULT;
 
 		err = kcm_unattach_ioctl(sock, &info);
 
@@ -1708,7 +1708,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 		struct socket *newsock = NULL;
 
 		if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
-			err = -EFAULT;
+			return -EFAULT;
 
 		err = kcm_clone(sock, &info, &newsock);
 

From 9f47a48e6eb97d793db85373e3ef4c55d876d334 Mon Sep 17 00:00:00 2001
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 23 Mar 2017 20:47:15 -0700
Subject: [PATCH 056/410] Revert "e1000e: driver trying to free already-free
 irq"

This reverts commit 7e54d9d063fa239c95c21548c5267f0ef419ff56.

After additional regression testing, several users are experiencing
kernel panics during shutdown on e1000e devices.  Reverting this
change resolves the issue.

Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 2175cced402f..e9af89ad039c 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6274,8 +6274,8 @@ static int e1000e_pm_freeze(struct device *dev)
 		/* Quiesce the device without resetting the hardware */
 		e1000e_down(adapter, false);
 		e1000_free_irq(adapter);
-		e1000e_reset_interrupt_capability(adapter);
 	}
+	e1000e_reset_interrupt_capability(adapter);
 
 	/* Allow time for pending master requests to run */
 	e1000e_disable_pcie_master(&adapter->hw);

From 95f255211396958c718aef8c45e3923b5211ea7b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 24 Mar 2017 09:38:03 -0700
Subject: [PATCH 057/410] net: Do not allow negative values for busy_read and
 busy_poll sysctl interfaces

This change basically codifies what I think was already the limitations on
the busy_poll and busy_read sysctl interfaces.  We weren't checking the
lower bounds and as such could input negative values. The behavior when
that was used was dependent on the architecture. In order to prevent any
issues with that I am just disabling support for values less than 0 since
this way we don't have to worry about any odd behaviors.

By limiting the sysctl values this way it also makes it consistent with how
we handle the SO_BUSY_POLL socket option since the value appears to be
reported as a signed integer value and negative values are rejected.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sysctl_net_core.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 4ead336e14ea..7f9cc400eca0 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = {
 		.data		= &sysctl_net_busy_poll,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
 	},
 	{
 		.procname	= "busy_read",
 		.data		= &sysctl_net_busy_read,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
 	},
 #endif
 #ifdef CONFIG_NET_SCHED

From 28ee1b746f493b7c62347d714f58fbf4f70df4f0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 24 Mar 2017 19:42:37 +0100
Subject: [PATCH 058/410] secure_seq: downgrade to per-host timestamp offsets

Unfortunately too many devices (not under our control) use tcp_tw_recycle=1,
which depends on timestamps being identical of the same saddr.

Although tcp_tw_recycle got removed in net-next we can't make
such end hosts disappear so downgrade to per-host timestamp offsets.

Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Reported-by: Yvan Vanrossomme <yvan@vanrossomme.net>
Fixes: 95a22caee396c ("tcp: randomize tcp timestamp offsets for each connection")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/secure_seq.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 758f140b6bed..d28da7d363f1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -20,9 +20,11 @@
 #include <net/tcp.h>
 
 static siphash_key_t net_secret __read_mostly;
+static siphash_key_t ts_secret __read_mostly;
 
 static __always_inline void net_secret_init(void)
 {
+	net_get_random_once(&ts_secret, sizeof(ts_secret));
 	net_get_random_once(&net_secret, sizeof(net_secret));
 }
 #endif
@@ -45,6 +47,23 @@ static u32 seq_scale(u32 seq)
 #endif
 
 #if IS_ENABLED(CONFIG_IPV6)
+static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+{
+	const struct {
+		struct in6_addr saddr;
+		struct in6_addr daddr;
+	} __aligned(SIPHASH_ALIGNMENT) combined = {
+		.saddr = *(struct in6_addr *)saddr,
+		.daddr = *(struct in6_addr *)daddr,
+	};
+
+	if (sysctl_tcp_timestamps != 1)
+		return 0;
+
+	return siphash(&combined, offsetofend(typeof(combined), daddr),
+		       &ts_secret);
+}
+
 u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 				 __be16 sport, __be16 dport, u32 *tsoff)
 {
@@ -63,7 +82,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 	net_secret_init();
 	hash = siphash(&combined, offsetofend(typeof(combined), dport),
 		       &net_secret);
-	*tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+	*tsoff = secure_tcpv6_ts_off(saddr, daddr);
 	return seq_scale(hash);
 }
 EXPORT_SYMBOL(secure_tcpv6_sequence_number);
@@ -88,6 +107,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
 
 #ifdef CONFIG_INET
+static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+{
+	if (sysctl_tcp_timestamps != 1)
+		return 0;
+
+	return siphash_2u32((__force u32)saddr, (__force u32)daddr,
+			    &ts_secret);
+}
 
 /* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
  * but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
@@ -103,7 +130,7 @@ u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 	hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
 			    (__force u32)sport << 16 | (__force u32)dport,
 			    &net_secret);
-	*tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+	*tsoff = secure_tcp_ts_off(saddr, daddr);
 	return seq_scale(hash);
 }
 

From 13a8cd191a2b470cfd435b3b57dbd21aa65ff78c Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 24 Mar 2017 15:01:42 -0700
Subject: [PATCH 059/410] i40e: Do not enable NAPI on q_vectors that have no
 rings

When testing the epoll w/ busy poll code I found that I could get into a
state where the i40e driver had q_vectors w/ active NAPI that had no rings.
This was resulting in a divide by zero error.  To correct it I am updating
the driver code so that we only support NAPI on q_vectors that have 1 or
more rings allocated to them.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e8a8351c8ea9..82a95cc2c8ee 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4438,8 +4438,12 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi)
 	if (!vsi->netdev)
 		return;
 
-	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-		napi_enable(&vsi->q_vectors[q_idx]->napi);
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_enable(&q_vector->napi);
+	}
 }
 
 /**
@@ -4453,8 +4457,12 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi)
 	if (!vsi->netdev)
 		return;
 
-	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
-		napi_disable(&vsi->q_vectors[q_idx]->napi);
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_disable(&q_vector->napi);
+	}
 }
 
 /**

From 43a6684519ab0a6c52024b5e25322476cabad893 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 24 Mar 2017 19:36:13 -0700
Subject: [PATCH 060/410] ping: implement proper locking

We got a report of yet another bug in ping

http://www.openwall.com/lists/oss-security/2017/03/24/6

->disconnect() is not called with socket lock held.

Fix this by acquiring ping rwlock earlier.

Thanks to Daniel, Alexander and Andrey for letting us know this problem.

Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Daniel Jiang <danieljiang0415@gmail.com>
Reported-by: Solar Designer <solar@openwall.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2af6244b83e2..ccfbce13a633 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -156,17 +156,18 @@ int ping_hash(struct sock *sk)
 void ping_unhash(struct sock *sk)
 {
 	struct inet_sock *isk = inet_sk(sk);
+
 	pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
+	write_lock_bh(&ping_table.lock);
 	if (sk_hashed(sk)) {
-		write_lock_bh(&ping_table.lock);
 		hlist_nulls_del(&sk->sk_nulls_node);
 		sk_nulls_node_init(&sk->sk_nulls_node);
 		sock_put(sk);
 		isk->inet_num = 0;
 		isk->inet_sport = 0;
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
-		write_unlock_bh(&ping_table.lock);
 	}
+	write_unlock_bh(&ping_table.lock);
 }
 EXPORT_SYMBOL_GPL(ping_unhash);
 

From b1977682a3858b5584ffea7cfb7bd863f68db18d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 24 Mar 2017 15:57:33 -0700
Subject: [PATCH 061/410] bpf: improve verifier packet range checks

llvm can optimize the 'if (ptr > data_end)' checks to be in the order
slightly different than the original C code which will confuse verifier.
Like:
if (ptr + 16 > data_end)
  return TC_ACT_SHOT;
// may be followed by
if (ptr + 14 > data_end)
  return TC_ACT_SHOT;
while llvm can see that 'ptr' is valid for all 16 bytes,
the verifier could not.
Fix verifier logic to account for such case and add a test.

Reported-by: Huapeng Zhou <hzhou@fb.com>
Fixes: 969bf05eb3ce ("bpf: direct packet access")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                       |  5 +++--
 tools/testing/selftests/bpf/test_verifier.c | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 796b68d00119..5e6202e62265 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1973,14 +1973,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
 
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
-			regs[i].range = dst_reg->off;
+			/* keep the maximum range already checked */
+			regs[i].range = max(regs[i].range, dst_reg->off);
 
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] != STACK_SPILL)
 			continue;
 		reg = &state->spilled_regs[i / BPF_REG_SIZE];
 		if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
-			reg->range = dst_reg->off;
+			reg->range = max(reg->range, dst_reg->off);
 	}
 }
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index d1555e4240c0..7d761d4cc759 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -3417,6 +3417,26 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
 	},
+	{
+		"overlapping checks for direct packet access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
+	},
 	{
 		"invalid access of tc_classid for LWT_IN",
 		.insns = {

From 5659495a7a1455665ce1466d156597ad1bda8772 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naota@elisp.net>
Date: Fri, 24 Mar 2017 23:04:44 -0700
Subject: [PATCH 062/410] uapi: add missing install of userio.h

While commit 5523662edd4f ("Input: add userio module") added userio.h
under the uapi/ directory, it forgot to add the header file to Kbuild.
Thus, the file was missing from header installation.

Signed-off-by: Naohiro Aota <naota@elisp.net>
Reviewed-by: Lyude Paul <thatslyude@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/uapi/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index f330ba4547cf..b4a9a1891db6 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -437,6 +437,7 @@ header-y += unistd.h
 header-y += unix_diag.h
 header-y += usbdevice_fs.h
 header-y += usbip.h
+header-y += userio.h
 header-y += utime.h
 header-y += utsname.h
 header-y += uuid.h

From a096926ed4532eac38d4ec92aaba8c7f2149d89a Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Fri, 24 Mar 2017 18:44:02 +0100
Subject: [PATCH 063/410] iio: cros_ec_sensors: Fix return value to get raw and
 calibbias data.

The cros_ec_sensors_read function must return the type of value on all
cases. This was always true except for RAW and CALIBBIAS data which
returned an error or 0. This patch just fixes the mistake I introduced
when submitting the series.

Fixes: commit c14dca07a31d (iio: cros_ec_sensors: add ChromeOS EC
Contiguous Sensors driver)

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
index d6c372bb433b..c17596f7ed2c 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
@@ -61,7 +61,7 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev,
 		ret = st->core.read_ec_sensors_data(indio_dev, 1 << idx, &data);
 		if (ret < 0)
 			break;
-
+		ret = IIO_VAL_INT;
 		*val = data;
 		break;
 	case IIO_CHAN_INFO_CALIBBIAS:
@@ -76,7 +76,7 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev,
 		for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++)
 			st->core.calib[i] =
 				st->core.resp->sensor_offset.offset[i];
-
+		ret = IIO_VAL_INT;
 		*val = st->core.calib[idx];
 		break;
 	case IIO_CHAN_INFO_SCALE:

From 4bdc9029685ac03be50b320b29691766d2326c2b Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@free-electrons.com>
Date: Tue, 21 Mar 2017 16:52:14 +0100
Subject: [PATCH 064/410] iio: bmg160: reset chip when probing

The gyroscope chip might need to be reset to be used.

Without the chip being reset, the driver stopped at the first
regmap_read (to get the CHIP_ID) and failed to probe.

The datasheet of the gyroscope says that a minimum wait of 30ms after
the reset has to be done.

This patch has been checked on a BMX055 and the datasheet of the BMG160
and the BMI055 give the same reset register and bits.

Signed-off-by: Quentin Schulz <quentin.schulz@free-electrons.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/gyro/bmg160_core.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c
index f7fcfa886f72..821919dd245b 100644
--- a/drivers/iio/gyro/bmg160_core.c
+++ b/drivers/iio/gyro/bmg160_core.c
@@ -27,6 +27,7 @@
 #include <linux/iio/trigger_consumer.h>
 #include <linux/iio/triggered_buffer.h>
 #include <linux/regmap.h>
+#include <linux/delay.h>
 #include "bmg160.h"
 
 #define BMG160_IRQ_NAME		"bmg160_event"
@@ -52,6 +53,9 @@
 #define BMG160_DEF_BW			100
 #define BMG160_REG_PMU_BW_RES		BIT(7)
 
+#define BMG160_GYRO_REG_RESET		0x14
+#define BMG160_GYRO_RESET_VAL		0xb6
+
 #define BMG160_REG_INT_MAP_0		0x17
 #define BMG160_INT_MAP_0_BIT_ANY	BIT(1)
 
@@ -236,6 +240,14 @@ static int bmg160_chip_init(struct bmg160_data *data)
 	int ret;
 	unsigned int val;
 
+	/*
+	 * Reset chip to get it in a known good state. A delay of 30ms after
+	 * reset is required according to the datasheet.
+	 */
+	regmap_write(data->regmap, BMG160_GYRO_REG_RESET,
+		     BMG160_GYRO_RESET_VAL);
+	usleep_range(30000, 30700);
+
 	ret = regmap_read(data->regmap, BMG160_REG_CHIP_ID, &val);
 	if (ret < 0) {
 		dev_err(dev, "Error reading reg_chip_id\n");

From a17f1861b5ea4327f9f35e9edb3c5fadceaa7c64 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 24 Mar 2017 23:02:49 +0100
Subject: [PATCH 065/410] net: hns: fix uninitialized data use

When dev_dbg() is enabled, we print uninitialized data, as gcc-7.0.1
now points out:

ethernet/hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_promisc_tcam':
ethernet/hisilicon/hns/hns_dsaf_main.c:2947:75: error: 'tbl_tcam_data.low.val' may be used uninitialized in this function [-Werror=maybe-uninitialized]
ethernet/hisilicon/hns/hns_dsaf_main.c:2947:75: error: 'tbl_tcam_data.high.val' may be used uninitialized in this function [-Werror=maybe-uninitialized]

We also pass the data into hns_dsaf_tcam_mc_cfg(), which might later
use it (not sure about that), so it seems safer to just always initialize
the tbl_tcam_data structure.

Fixes: 1f5fa2dd1cfa ("net: hns: fix for promisc mode in HNS driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 90dbda792614..cd93657abe87 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -2924,10 +2924,11 @@ void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
 	/* find the tcam entry index for promisc */
 	entry_index = dsaf_promisc_tcam_entry(port);
 
+	memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
+	memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
+
 	/* config key mask */
 	if (enable) {
-		memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
-		memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
 		dsaf_set_field(tbl_tcam_data.low.bits.port_vlan,
 			       DSAF_TBL_TCAM_KEY_PORT_M,
 			       DSAF_TBL_TCAM_KEY_PORT_S, port);

From 834a61d455ff8069552f44140b2c1de85e6bc84d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 24 Mar 2017 23:02:50 +0100
Subject: [PATCH 066/410] net: hns: avoid gcc-7.0.1 warning for uninitialized
 data

hns_dsaf_set_mac_key() calls dsaf_set_field() on an uninitialized field,
which will then change only a few of its bits, causing a warning with
the latest gcc:

hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_mac_uc_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
   (origin) &= (~(mask)); \
            ^~
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_set_mac_mc_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_add_mac_mc_port':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_del_mac_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_rm_mac_addr':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_del_mac_mc_port':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_mac_uc_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]
hisilicon/hns/hns_dsaf_main.c: In function 'hns_dsaf_get_mac_mc_entry':
hisilicon/hns/hns_dsaf_reg.h:1046:12: error: 'mac_key.low.bits.port_vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized]

The code is actually correct since we always set all 16 bits of the
port_vlan field, but gcc correctly points out that the first
access does contain uninitialized data.

This initializes the field to zero first before setting the
individual bits.

Fixes: 5483bfcb169c ("net: hns: modify tcam table and set mac key")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index cd93657abe87..403ea9db6dbd 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -1519,6 +1519,7 @@ static void hns_dsaf_set_mac_key(
 	mac_key->high.bits.mac_3 = addr[3];
 	mac_key->low.bits.mac_4 = addr[4];
 	mac_key->low.bits.mac_5 = addr[5];
+	mac_key->low.bits.port_vlan = 0;
 	dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M,
 		       DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id);
 	dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M,

From 6ac3b77a6ffff7513ff86b684aa256ea01c0e5b5 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Sat, 25 Mar 2017 01:48:08 +0300
Subject: [PATCH 067/410] irda: vlsi_ir: fix check for DMA mapping errors

vlsi_alloc_ring() checks for DMA mapping errors by comparing
returned address with zero, while pci_dma_mapping_error() should be used.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/irda/vlsi_ir.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index ffedad2a360a..15b920086251 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -418,8 +418,9 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr
 		memset(rd, 0, sizeof(*rd));
 		rd->hw = hwmap + i;
 		rd->buf = kmalloc(len, GFP_KERNEL|GFP_DMA);
-		if (rd->buf == NULL ||
-		    !(busaddr = pci_map_single(pdev, rd->buf, len, dir))) {
+		if (rd->buf)
+			busaddr = pci_map_single(pdev, rd->buf, len, dir);
+		if (rd->buf == NULL || pci_dma_mapping_error(pdev, busaddr)) {
 			if (rd->buf) {
 				net_err_ratelimited("%s: failed to create PCI-MAP for %p\n",
 						    __func__, rd->buf);
@@ -430,8 +431,7 @@ static struct vlsi_ring *vlsi_alloc_ring(struct pci_dev *pdev, struct ring_descr
 				rd = r->rd + j;
 				busaddr = rd_get_addr(rd);
 				rd_set_addr_status(rd, 0, 0);
-				if (busaddr)
-					pci_unmap_single(pdev, busaddr, len, dir);
+				pci_unmap_single(pdev, busaddr, len, dir);
 				kfree(rd->buf);
 				rd->buf = NULL;
 			}

From 2db2c250dd3d1e74a50d4ab5f44c44ca5cb4e42b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 27 Mar 2017 15:11:44 +1100
Subject: [PATCH 068/410] selftests/powerpc: Fix standalone powerpc build

The changes to enable building with a separate output directory, in
commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT") broke
building the powerpc selftests on their own, eg:

 $ cd tools/testing/selftests/powerpc; make

It was partially fixed in commit e53aff45c490 ("selftests: lib.mk Fix
individual test builds"), which defined OUTPUT for standalone tests. But
that only defines OUTPUT within the Makefile, the value is not exported
so sub-shells can't see it. We could export OUTPUT, but it's actually
cleaner to just expand the value of OUTPUT before we invoke the shell.

Fixes: a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/Makefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 1c5d0575802e..bf13fc2297aa 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -34,34 +34,34 @@ endif
 all: $(SUB_DIRS)
 
 $(SUB_DIRS):
-	BUILD_TARGET=$$OUTPUT/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
+	BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
 
 include ../lib.mk
 
 override define RUN_TESTS
 	@for TARGET in $(SUB_DIRS); do \
-		BUILD_TARGET=$$OUTPUT/$$TARGET;	\
+		BUILD_TARGET=$(OUTPUT)/$$TARGET;	\
 		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
 	done;
 endef
 
 override define INSTALL_RULE
 	@for TARGET in $(SUB_DIRS); do \
-		BUILD_TARGET=$$OUTPUT/$$TARGET;	\
+		BUILD_TARGET=$(OUTPUT)/$$TARGET;	\
 		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
 	done;
 endef
 
 override define EMIT_TESTS
 	@for TARGET in $(SUB_DIRS); do \
-		BUILD_TARGET=$$OUTPUT/$$TARGET;	\
+		BUILD_TARGET=$(OUTPUT)/$$TARGET;	\
 		$(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
 	done;
 endef
 
 clean:
 	@for TARGET in $(SUB_DIRS); do \
-		BUILD_TARGET=$$OUTPUT/$$TARGET;	\
+		BUILD_TARGET=$(OUTPUT)/$$TARGET;	\
 		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
 	done;
 	rm -f tags

From 3b7dabf029478bb80507a6c4500ca94132a2bc0b Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 25 Mar 2017 08:53:12 +0800
Subject: [PATCH 069/410] netfilter: invoke synchronize_rcu after set the
 _hook_ to NULL

Otherwise, another CPU may access the invalid pointer. For example:
    CPU0                CPU1
     -              rcu_read_lock();
     -              pfunc = _hook_;
  _hook_ = NULL;          -
  mod unload              -
     -                 pfunc(); // invalid, panic
     -             rcu_read_unlock();

So we must call synchronize_rcu() to wait the rcu reader to finish.

Also note, in nf_nat_snmp_basic_fini, synchronize_rcu() will be invoked
by later nf_conntrack_helper_unregister, but I'm inclined to add a
explicit synchronize_rcu after set the nf_nat_snmp_hook to NULL. Depend
on such obscure assumptions is not a good idea.

Last, in nfnetlink_cttimeout, we use kfree_rcu to free the time object,
so in cttimeout_exit, invoking rcu_barrier() is not necessary at all,
remove it too.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 1 +
 net/netfilter/nf_conntrack_ecache.c    | 2 ++
 net/netfilter/nf_conntrack_netlink.c   | 1 +
 net/netfilter/nf_nat_core.c            | 2 ++
 net/netfilter/nfnetlink_cttimeout.c    | 2 +-
 5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c361da2..5a8f7c360887 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1304,6 +1304,7 @@ static int __init nf_nat_snmp_basic_init(void)
 static void __exit nf_nat_snmp_basic_fini(void)
 {
 	RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+	synchronize_rcu();
 	nf_conntrack_helper_unregister(&snmp_trap_helper);
 }
 
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index da9df2d56e66..22fc32143e9c 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net,
 	BUG_ON(notify != new);
 	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
+	/* synchronize_rcu() is called from ctnetlink_exit. */
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 
@@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net,
 	BUG_ON(notify != new);
 	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
+	/* synchronize_rcu() is called from ctnetlink_exit. */
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6806b5e73567..908d858034e4 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3442,6 +3442,7 @@ static void __exit ctnetlink_exit(void)
 #ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
 	RCU_INIT_POINTER(nfnl_ct_hook, NULL);
 #endif
+	synchronize_rcu();
 }
 
 module_init(ctnetlink_init);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 94b14c5a8b17..82802e4a6640 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -903,6 +903,8 @@ static void __exit nf_nat_cleanup(void)
 #ifdef CONFIG_XFRM
 	RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
 #endif
+	synchronize_rcu();
+
 	for (i = 0; i < NFPROTO_NUMPROTO; i++)
 		kfree(nf_nat_l4protos[i]);
 
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 139e0867e56e..47d6656c9119 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void)
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
 	RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
+	synchronize_rcu();
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	rcu_barrier();
 }
 
 module_init(cttimeout_init);

From 83d90219a5df8d950855ce73229a97b63605c317 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 25 Mar 2017 12:09:15 +0800
Subject: [PATCH 070/410] netfilter: nfnl_cthelper: fix a race when walk the
 nf_ct_helper_hash table

The nf_ct_helper_hash table is protected by nf_ct_helper_mutex, while
nfct_helper operation is protected by nfnl_lock(NFNL_SUBSYS_CTHELPER).
So it's possible that one CPU is walking the nf_ct_helper_hash for
cthelper add/get/del, another cpu is doing nf_conntrack_helpers_unregister
at the same time. This is dangrous, and may cause use after free error.

Note, delete operation will flush all cthelpers added via nfnetlink, so
using rcu to do protect is not easy.

Now introduce a dummy list to record all the cthelpers added via
nfnetlink, then we can walk the dummy list instead of walking the
nf_ct_helper_hash. Also, keep nfnl_cthelper_dump_table unchanged, it
may be invoked without nfnl_lock(NFNL_SUBSYS_CTHELPER) held.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 181 +++++++++++++----------------
 1 file changed, 83 insertions(+), 98 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 2b987d2a77bc..d45558178da5 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -32,6 +32,13 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
 MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers");
 
+struct nfnl_cthelper {
+	struct list_head		list;
+	struct nf_conntrack_helper	helper;
+};
+
+static LIST_HEAD(nfnl_cthelper_list);
+
 static int
 nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff,
 			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
@@ -205,14 +212,16 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 		     struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_helper *helper;
+	struct nfnl_cthelper *nfcth;
 	int ret;
 
 	if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN])
 		return -EINVAL;
 
-	helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL);
-	if (helper == NULL)
+	nfcth = kzalloc(sizeof(*nfcth), GFP_KERNEL);
+	if (nfcth == NULL)
 		return -ENOMEM;
+	helper = &nfcth->helper;
 
 	ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]);
 	if (ret < 0)
@@ -249,11 +258,12 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
 	if (ret < 0)
 		goto err2;
 
+	list_add_tail(&nfcth->list, &nfnl_cthelper_list);
 	return 0;
 err2:
 	kfree(helper->expect_policy);
 err1:
-	kfree(helper);
+	kfree(nfcth);
 	return ret;
 }
 
@@ -379,7 +389,8 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 	const char *helper_name;
 	struct nf_conntrack_helper *cur, *helper = NULL;
 	struct nf_conntrack_tuple tuple;
-	int ret = 0, i;
+	struct nfnl_cthelper *nlcth;
+	int ret = 0;
 
 	if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE])
 		return -EINVAL;
@@ -390,31 +401,22 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 	if (ret < 0)
 		return ret;
 
-	rcu_read_lock();
-	for (i = 0; i < nf_ct_helper_hsize && !helper; i++) {
-		hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+	list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
 
-			/* skip non-userspace conntrack helpers. */
-			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-				continue;
+		if (strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+			continue;
 
-			if (strncmp(cur->name, helper_name,
-					NF_CT_HELPER_NAME_LEN) != 0)
-				continue;
+		if ((tuple.src.l3num != cur->tuple.src.l3num ||
+		     tuple.dst.protonum != cur->tuple.dst.protonum))
+			continue;
 
-			if ((tuple.src.l3num != cur->tuple.src.l3num ||
-			     tuple.dst.protonum != cur->tuple.dst.protonum))
-				continue;
+		if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
 
-			if (nlh->nlmsg_flags & NLM_F_EXCL) {
-				ret = -EEXIST;
-				goto err;
-			}
-			helper = cur;
-			break;
-		}
+		helper = cur;
+		break;
 	}
-	rcu_read_unlock();
 
 	if (helper == NULL)
 		ret = nfnl_cthelper_create(tb, &tuple);
@@ -422,9 +424,6 @@ static int nfnl_cthelper_new(struct net *net, struct sock *nfnl,
 		ret = nfnl_cthelper_update(tb, helper);
 
 	return ret;
-err:
-	rcu_read_unlock();
-	return ret;
 }
 
 static int
@@ -588,11 +587,12 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 			     struct sk_buff *skb, const struct nlmsghdr *nlh,
 			     const struct nlattr * const tb[])
 {
-	int ret = -ENOENT, i;
+	int ret = -ENOENT;
 	struct nf_conntrack_helper *cur;
 	struct sk_buff *skb2;
 	char *helper_name = NULL;
 	struct nf_conntrack_tuple tuple;
+	struct nfnl_cthelper *nlcth;
 	bool tuple_set = false;
 
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -613,45 +613,39 @@ static int nfnl_cthelper_get(struct net *net, struct sock *nfnl,
 		tuple_set = true;
 	}
 
-	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_rcu(cur, &nf_ct_helper_hash[i], hnode) {
+	list_for_each_entry(nlcth, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
+		if (helper_name &&
+		    strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+			continue;
 
-			/* skip non-userspace conntrack helpers. */
-			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-				continue;
+		if (tuple_set &&
+		    (tuple.src.l3num != cur->tuple.src.l3num ||
+		     tuple.dst.protonum != cur->tuple.dst.protonum))
+			continue;
 
-			if (helper_name && strncmp(cur->name, helper_name,
-						NF_CT_HELPER_NAME_LEN) != 0) {
-				continue;
-			}
-			if (tuple_set &&
-			    (tuple.src.l3num != cur->tuple.src.l3num ||
-			     tuple.dst.protonum != cur->tuple.dst.protonum))
-				continue;
-
-			skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-			if (skb2 == NULL) {
-				ret = -ENOMEM;
-				break;
-			}
-
-			ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
-						nlh->nlmsg_seq,
-						NFNL_MSG_TYPE(nlh->nlmsg_type),
-						NFNL_MSG_CTHELPER_NEW, cur);
-			if (ret <= 0) {
-				kfree_skb(skb2);
-				break;
-			}
-
-			ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
-						MSG_DONTWAIT);
-			if (ret > 0)
-				ret = 0;
-
-			/* this avoids a loop in nfnetlink. */
-			return ret == -EAGAIN ? -ENOBUFS : ret;
+		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (skb2 == NULL) {
+			ret = -ENOMEM;
+			break;
 		}
+
+		ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid,
+					      nlh->nlmsg_seq,
+					      NFNL_MSG_TYPE(nlh->nlmsg_type),
+					      NFNL_MSG_CTHELPER_NEW, cur);
+		if (ret <= 0) {
+			kfree_skb(skb2);
+			break;
+		}
+
+		ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
+				      MSG_DONTWAIT);
+		if (ret > 0)
+			ret = 0;
+
+		/* this avoids a loop in nfnetlink. */
+		return ret == -EAGAIN ? -ENOBUFS : ret;
 	}
 	return ret;
 }
@@ -662,10 +656,10 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 {
 	char *helper_name = NULL;
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *tmp;
 	struct nf_conntrack_tuple tuple;
 	bool tuple_set = false, found = false;
-	int i, j = 0, ret;
+	struct nfnl_cthelper *nlcth, *n;
+	int j = 0, ret;
 
 	if (tb[NFCTH_NAME])
 		helper_name = nla_data(tb[NFCTH_NAME]);
@@ -678,30 +672,27 @@ static int nfnl_cthelper_del(struct net *net, struct sock *nfnl,
 		tuple_set = true;
 	}
 
-	for (i = 0; i < nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
-								hnode) {
-			/* skip non-userspace conntrack helpers. */
-			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-				continue;
+	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
+		j++;
 
-			j++;
+		if (helper_name &&
+		    strncmp(cur->name, helper_name, NF_CT_HELPER_NAME_LEN))
+			continue;
 
-			if (helper_name && strncmp(cur->name, helper_name,
-						NF_CT_HELPER_NAME_LEN) != 0) {
-				continue;
-			}
-			if (tuple_set &&
-			    (tuple.src.l3num != cur->tuple.src.l3num ||
-			     tuple.dst.protonum != cur->tuple.dst.protonum))
-				continue;
+		if (tuple_set &&
+		    (tuple.src.l3num != cur->tuple.src.l3num ||
+		     tuple.dst.protonum != cur->tuple.dst.protonum))
+			continue;
 
-			found = true;
-			nf_conntrack_helper_unregister(cur);
-			kfree(cur->expect_policy);
-			kfree(cur);
-		}
+		found = true;
+		nf_conntrack_helper_unregister(cur);
+		kfree(cur->expect_policy);
+
+		list_del(&nlcth->list);
+		kfree(nlcth);
 	}
+
 	/* Make sure we return success if we flush and there is no helpers */
 	return (found || j == 0) ? 0 : -ENOENT;
 }
@@ -750,22 +741,16 @@ err_out:
 static void __exit nfnl_cthelper_exit(void)
 {
 	struct nf_conntrack_helper *cur;
-	struct hlist_node *tmp;
-	int i;
+	struct nfnl_cthelper *nlcth, *n;
 
 	nfnetlink_subsys_unregister(&nfnl_cthelper_subsys);
 
-	for (i=0; i<nf_ct_helper_hsize; i++) {
-		hlist_for_each_entry_safe(cur, tmp, &nf_ct_helper_hash[i],
-									hnode) {
-			/* skip non-userspace conntrack helpers. */
-			if (!(cur->flags & NF_CT_HELPER_F_USERSPACE))
-				continue;
+	list_for_each_entry_safe(nlcth, n, &nfnl_cthelper_list, list) {
+		cur = &nlcth->helper;
 
-			nf_conntrack_helper_unregister(cur);
-			kfree(cur->expect_policy);
-			kfree(cur);
-		}
+		nf_conntrack_helper_unregister(cur);
+		kfree(cur->expect_policy);
+		kfree(nlcth);
 	}
 }
 

From 9c3f3794926a997b1cab6c42480ff300efa2d162 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 25 Mar 2017 16:35:29 +0800
Subject: [PATCH 071/410] netfilter: nf_ct_ext: fix possible panic after
 nf_ct_extend_unregister

If one cpu is doing nf_ct_extend_unregister while another cpu is doing
__nf_ct_ext_add_length, then we may hit BUG_ON(t == NULL). Moreover,
there's no synchronize_rcu invocation after set nf_ct_ext_types[id] to
NULL, so it's possible that we may access invalid pointer.

But actually, most of the ct extends are built-in, so the problem listed
above will not happen. However, there are two exceptions: NF_CT_EXT_NAT
and NF_CT_EXT_SYNPROXY.

For _EXT_NAT, the panic will not happen, since adding the nat extend and
unregistering the nat extend are located in the same file(nf_nat_core.c),
this means that after the nat module is removed, we cannot add the nat
extend too.

For _EXT_SYNPROXY, synproxy extend may be added by init_conntrack, while
synproxy extend unregister will be done by synproxy_core_exit. So after
nf_synproxy_core.ko is removed, we may still try to add the synproxy
extend, then kernel panic may happen.

I know it's very hard to reproduce this issue, but I can play a tricky
game to make it happen very easily :)

Step 1. Enable SYNPROXY for tcp dport 1234 at FORWARD hook:
  # iptables -I FORWARD -p tcp --dport 1234 -j SYNPROXY
Step 2. Queue the syn packet to the userspace at raw table OUTPUT hook.
        Also note, in the userspace we only add a 20s' delay, then
        reinject the syn packet to the kernel:
  # iptables -t raw -I OUTPUT -p tcp --syn -j NFQUEUE --queue-num 1
Step 3. Using "nc 2.2.2.2 1234" to connect the server.
Step 4. Now remove the nf_synproxy_core.ko quickly:
  # iptables -F FORWARD
  # rmmod ipt_SYNPROXY
  # rmmod nf_synproxy_core
Step 5. After 20s' delay, the syn packet is reinjected to the kernel.

Now you will see the panic like this:
  kernel BUG at net/netfilter/nf_conntrack_extend.c:91!
  Call Trace:
   ? __nf_ct_ext_add_length+0x53/0x3c0 [nf_conntrack]
   init_conntrack+0x12b/0x600 [nf_conntrack]
   nf_conntrack_in+0x4cc/0x580 [nf_conntrack]
   ipv4_conntrack_local+0x48/0x50 [nf_conntrack_ipv4]
   nf_reinject+0x104/0x270
   nfqnl_recv_verdict+0x3e1/0x5f9 [nfnetlink_queue]
   ? nfqnl_recv_verdict+0x5/0x5f9 [nfnetlink_queue]
   ? nla_parse+0xa0/0x100
   nfnetlink_rcv_msg+0x175/0x6a9 [nfnetlink]
   [...]

One possible solution is to make NF_CT_EXT_SYNPROXY extend built-in, i.e.
introduce nf_conntrack_synproxy.c and only do ct extend register and
unregister in it, similar to nf_conntrack_timeout.c.

But having such a obscure restriction of nf_ct_extend_unregister is not a
good idea, so we should invoke synchronize_rcu after set nf_ct_ext_types
to NULL, and check the NULL pointer when do __nf_ct_ext_add_length. Then
it will be easier if we add new ct extend in the future.

Last, we use kfree_rcu to free nf_ct_ext, so rcu_barrier() is unnecessary
anymore, remove it too.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_extend.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 02bcf00c2492..008299b7f78f 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
 
 	rcu_read_lock();
 	t = rcu_dereference(nf_ct_ext_types[id]);
-	BUG_ON(t == NULL);
+	if (!t) {
+		rcu_read_unlock();
+		return NULL;
+	}
+
 	off = ALIGN(sizeof(struct nf_ct_ext), t->align);
 	len = off + t->len + var_alloc_len;
 	alloc_size = t->alloc_size + var_alloc_len;
@@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
 
 	rcu_read_lock();
 	t = rcu_dereference(nf_ct_ext_types[id]);
-	BUG_ON(t == NULL);
+	if (!t) {
+		rcu_read_unlock();
+		return NULL;
+	}
 
 	newoff = ALIGN(old->len, t->align);
 	newlen = newoff + t->len + var_alloc_len;
@@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
 	RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
 	update_alloc_size(type);
 	mutex_unlock(&nf_ct_ext_type_mutex);
-	rcu_barrier(); /* Wait for completion of call_rcu()'s */
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);

From 75c689dca98851d65ef5a27e5ce26b625b68751c Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Sat, 25 Mar 2017 18:24:36 +0800
Subject: [PATCH 072/410] netfilter: nf_nat_snmp: Fix panic when
 snmp_trap_helper fails to register

In the commit 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp
helper"), the snmp_helper is replaced by nf_nat_snmp_hook. So the
snmp_helper is never registered. But it still tries to unregister the
snmp_helper, it could cause the panic.

Now remove the useless snmp_helper and the unregister call in the
error handler.

Fixes: 93557f53e1fb ("netfilter: nf_conntrack: nf_conntrack snmp helper")
Signed-off-by: Gao Feng <fgao@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_snmp_basic.c | 19 +------------------
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 5a8f7c360887..53e49f5011d3 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1260,16 +1260,6 @@ static const struct nf_conntrack_expect_policy snmp_exp_policy = {
 	.timeout	= 180,
 };
 
-static struct nf_conntrack_helper snmp_helper __read_mostly = {
-	.me			= THIS_MODULE,
-	.help			= help,
-	.expect_policy		= &snmp_exp_policy,
-	.name			= "snmp",
-	.tuple.src.l3num	= AF_INET,
-	.tuple.src.u.udp.port	= cpu_to_be16(SNMP_PORT),
-	.tuple.dst.protonum	= IPPROTO_UDP,
-};
-
 static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
 	.me			= THIS_MODULE,
 	.help			= help,
@@ -1288,17 +1278,10 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
 
 static int __init nf_nat_snmp_basic_init(void)
 {
-	int ret = 0;
-
 	BUG_ON(nf_nat_snmp_hook != NULL);
 	RCU_INIT_POINTER(nf_nat_snmp_hook, help);
 
-	ret = nf_conntrack_helper_register(&snmp_trap_helper);
-	if (ret < 0) {
-		nf_conntrack_helper_unregister(&snmp_helper);
-		return ret;
-	}
-	return ret;
+	return nf_conntrack_helper_register(&snmp_trap_helper);
 }
 
 static void __exit nf_nat_snmp_basic_fini(void)

From 9bcf53f34a2c1cebc45cc12e273dcd5f51fbc099 Mon Sep 17 00:00:00 2001
From: "Reizer, Eyal" <eyalr@ti.com>
Date: Sun, 26 Mar 2017 08:53:10 +0000
Subject: [PATCH 073/410] ARM: dts: am335x-evmsk: adjust mmc2 param to allow
 suspend

mmc2 used for wl12xx was missing the keep-power-in suspend
parameter. As a result the board couldn't reach suspend state.

Signed-off-by: Eyal Reizer <eyalr@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am335x-evmsk.dts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/am335x-evmsk.dts b/arch/arm/boot/dts/am335x-evmsk.dts
index 9e43c443738a..9ba4b18c0cb2 100644
--- a/arch/arm/boot/dts/am335x-evmsk.dts
+++ b/arch/arm/boot/dts/am335x-evmsk.dts
@@ -672,6 +672,7 @@
 	ti,non-removable;
 	bus-width = <4>;
 	cap-power-off-card;
+	keep-power-in-suspend;
 	pinctrl-names = "default";
 	pinctrl-0 = <&mmc2_pins>;
 

From 351b7c490700747d1dba1b0a10fbfe3448d11c35 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 22 Mar 2017 11:01:48 -0700
Subject: [PATCH 074/410] ARM: omap2+: Revert omap-smp.c changes resetting CPU1
 during boot

Commit 3251885285e1 ("ARM: OMAP4+: Reset CPU1 properly for kexec") started
unconditionally resetting CPU1 because of a kexec boot issue I was seeing
earlier on omap4 when doing kexec boot between two different kernel
versions.

This caused issues on some systems. We should only reset CPU1 as a last
resort option, and try to avoid it where possible. Doing an unconditional
CPU1 reset causes issues for example when booting a bootloader configured
secure OS running on CPU1 as reported by Andrew F. Davis <afd@ti.com>.

We can't completely remove the reset of CPU1 as it would break kexec
booting from older kernels. But we can limit the CPU1 reset to cases
where CPU1 is wrongly parked within the memory area used by the booting
kernel. Then later on we can add support for parking CPU1 for kexec out
of the SDRAM back to bootrom.

So let's first fix the regression reported by Andrew by making CPU1 reset
conditional. To do this, we need to:

1. Save configured AUX_CORE_BOOT_1 for later

2. Modify AUX_CORE_BOOT_0 reading code to for HS SoCs to return
   the whole register instead of the CPU mask

3. Check if CPU1 is wrongly parked into the booting kernel by the
   previous kernel and reset if needed

Fixes: 3251885285e1 ("ARM: OMAP4+: Reset CPU1 properly for kexec")
Reported-by: Andrew F. Davis <afd@ti.com>
Cc: Andrew F. Davis <afd@ti.com>
Cc: Keerthy <j-keerthy@ti.com>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Santosh Shilimkar <ssantosh@kernel.org>
Cc: Tero Kristo <t-kristo@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Tested-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/common.h              |  1 +
 arch/arm/mach-omap2/omap-hotplug.c        |  2 +-
 arch/arm/mach-omap2/omap-mpuss-lowpower.c | 22 +++++-
 arch/arm/mach-omap2/omap-smc.S            |  1 -
 arch/arm/mach-omap2/omap-smp.c            | 90 +++++++++++++++++++----
 5 files changed, 96 insertions(+), 20 deletions(-)

diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index c4f2ace91ea2..3089d3bfa19b 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -270,6 +270,7 @@ extern const struct smp_operations omap4_smp_ops;
 extern int omap4_mpuss_init(void);
 extern int omap4_enter_lowpower(unsigned int cpu, unsigned int power_state);
 extern int omap4_hotplug_cpu(unsigned int cpu, unsigned int power_state);
+extern u32 omap4_get_cpu1_ns_pa_addr(void);
 #else
 static inline int omap4_enter_lowpower(unsigned int cpu,
 					unsigned int power_state)
diff --git a/arch/arm/mach-omap2/omap-hotplug.c b/arch/arm/mach-omap2/omap-hotplug.c
index d3fb5661bb5d..433db6d0b073 100644
--- a/arch/arm/mach-omap2/omap-hotplug.c
+++ b/arch/arm/mach-omap2/omap-hotplug.c
@@ -50,7 +50,7 @@ void omap4_cpu_die(unsigned int cpu)
 		omap4_hotplug_cpu(cpu, PWRDM_POWER_OFF);
 
 		if (omap_secure_apis_support())
-			boot_cpu = omap_read_auxcoreboot0();
+			boot_cpu = omap_read_auxcoreboot0() >> 9;
 		else
 			boot_cpu =
 				readl_relaxed(base + OMAP_AUX_CORE_BOOT_0) >> 5;
diff --git a/arch/arm/mach-omap2/omap-mpuss-lowpower.c b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
index 113ab2dd2ee9..03ec6d307c82 100644
--- a/arch/arm/mach-omap2/omap-mpuss-lowpower.c
+++ b/arch/arm/mach-omap2/omap-mpuss-lowpower.c
@@ -64,6 +64,7 @@
 #include "prm-regbits-44xx.h"
 
 static void __iomem *sar_base;
+static u32 old_cpu1_ns_pa_addr;
 
 #if defined(CONFIG_PM) && defined(CONFIG_SMP)
 
@@ -212,6 +213,11 @@ static void __init save_l2x0_context(void)
 {}
 #endif
 
+u32 omap4_get_cpu1_ns_pa_addr(void)
+{
+	return old_cpu1_ns_pa_addr;
+}
+
 /**
  * omap4_enter_lowpower: OMAP4 MPUSS Low Power Entry Function
  * The purpose of this function is to manage low power programming
@@ -460,22 +466,30 @@ int __init omap4_mpuss_init(void)
 void __init omap4_mpuss_early_init(void)
 {
 	unsigned long startup_pa;
+	void __iomem *ns_pa_addr;
 
-	if (!(cpu_is_omap44xx() || soc_is_omap54xx()))
+	if (!(soc_is_omap44xx() || soc_is_omap54xx()))
 		return;
 
 	sar_base = omap4_get_sar_ram_base();
 
-	if (cpu_is_omap443x())
+	/* Save old NS_PA_ADDR for validity checks later on */
+	if (soc_is_omap44xx())
+		ns_pa_addr = sar_base + CPU1_WAKEUP_NS_PA_ADDR_OFFSET;
+	else
+		ns_pa_addr = sar_base + OMAP5_CPU1_WAKEUP_NS_PA_ADDR_OFFSET;
+	old_cpu1_ns_pa_addr = readl_relaxed(ns_pa_addr);
+
+	if (soc_is_omap443x())
 		startup_pa = __pa_symbol(omap4_secondary_startup);
-	else if (cpu_is_omap446x())
+	else if (soc_is_omap446x())
 		startup_pa = __pa_symbol(omap4460_secondary_startup);
 	else if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE)
 		startup_pa = __pa_symbol(omap5_secondary_hyp_startup);
 	else
 		startup_pa = __pa_symbol(omap5_secondary_startup);
 
-	if (cpu_is_omap44xx())
+	if (soc_is_omap44xx())
 		writel_relaxed(startup_pa, sar_base +
 			       CPU1_WAKEUP_NS_PA_ADDR_OFFSET);
 	else
diff --git a/arch/arm/mach-omap2/omap-smc.S b/arch/arm/mach-omap2/omap-smc.S
index fd90125bffc7..72506e6cf9e7 100644
--- a/arch/arm/mach-omap2/omap-smc.S
+++ b/arch/arm/mach-omap2/omap-smc.S
@@ -94,6 +94,5 @@ ENTRY(omap_read_auxcoreboot0)
 	ldr	r12, =0x103
 	dsb
 	smc	#0
-	mov	r0, r0, lsr #9
 	ldmfd   sp!, {r2-r12, pc}
 ENDPROC(omap_read_auxcoreboot0)
diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c
index 003353b0b794..3faf454ba487 100644
--- a/arch/arm/mach-omap2/omap-smp.c
+++ b/arch/arm/mach-omap2/omap-smp.c
@@ -21,6 +21,7 @@
 #include <linux/io.h>
 #include <linux/irqchip/arm-gic.h>
 
+#include <asm/sections.h>
 #include <asm/smp_scu.h>
 #include <asm/virt.h>
 
@@ -40,10 +41,14 @@
 
 #define OMAP5_CORE_COUNT	0x2
 
+#define AUX_CORE_BOOT0_GP_RELEASE	0x020
+#define AUX_CORE_BOOT0_HS_RELEASE	0x200
+
 struct omap_smp_config {
 	unsigned long cpu1_rstctrl_pa;
 	void __iomem *cpu1_rstctrl_va;
 	void __iomem *scu_base;
+	void __iomem *wakeupgen_base;
 	void *startup_addr;
 };
 
@@ -140,7 +145,6 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	static struct clockdomain *cpu1_clkdm;
 	static bool booted;
 	static struct powerdomain *cpu1_pwrdm;
-	void __iomem *base = omap_get_wakeupgen_base();
 
 	/*
 	 * Set synchronisation state between this boot processor
@@ -155,9 +159,11 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * A barrier is added to ensure that write buffer is drained
 	 */
 	if (omap_secure_apis_support())
-		omap_modify_auxcoreboot0(0x200, 0xfffffdff);
+		omap_modify_auxcoreboot0(AUX_CORE_BOOT0_HS_RELEASE,
+					 0xfffffdff);
 	else
-		writel_relaxed(0x20, base + OMAP_AUX_CORE_BOOT_0);
+		writel_relaxed(AUX_CORE_BOOT0_GP_RELEASE,
+			       cfg.wakeupgen_base + OMAP_AUX_CORE_BOOT_0);
 
 	if (!cpu1_clkdm && !cpu1_pwrdm) {
 		cpu1_clkdm = clkdm_lookup("mpu1_clkdm");
@@ -261,9 +267,72 @@ static void __init omap4_smp_init_cpus(void)
 		set_cpu_possible(i, true);
 }
 
+/*
+ * For now, just make sure the start-up address is not within the booting
+ * kernel space as that means we just overwrote whatever secondary_startup()
+ * code there was.
+ */
+static bool __init omap4_smp_cpu1_startup_valid(unsigned long addr)
+{
+	if ((addr >= __pa(PAGE_OFFSET)) && (addr <= __pa(__bss_start)))
+		return false;
+
+	return true;
+}
+
+/*
+ * We may need to reset CPU1 before configuring, otherwise kexec boot can end
+ * up trying to use old kernel startup address or suspend-resume will
+ * occasionally fail to bring up CPU1 on 4430 if CPU1 fails to enter deeper
+ * idle states.
+ */
+static void __init omap4_smp_maybe_reset_cpu1(struct omap_smp_config *c)
+{
+	unsigned long cpu1_startup_pa, cpu1_ns_pa_addr;
+	bool needs_reset = false;
+	u32 released;
+
+	if (omap_secure_apis_support())
+		released = omap_read_auxcoreboot0() & AUX_CORE_BOOT0_HS_RELEASE;
+	else
+		released = readl_relaxed(cfg.wakeupgen_base +
+					 OMAP_AUX_CORE_BOOT_0) &
+						AUX_CORE_BOOT0_GP_RELEASE;
+	if (released) {
+		pr_warn("smp: CPU1 not parked?\n");
+
+		return;
+	}
+
+	cpu1_startup_pa = readl_relaxed(cfg.wakeupgen_base +
+					OMAP_AUX_CORE_BOOT_1);
+	cpu1_ns_pa_addr = omap4_get_cpu1_ns_pa_addr();
+
+	/* Did the configured secondary_startup() get overwritten? */
+	if (!omap4_smp_cpu1_startup_valid(cpu1_startup_pa))
+		needs_reset = true;
+
+	/*
+	 * If omap4 or 5 has NS_PA_ADDR configured, CPU1 may be in a
+	 * deeper idle state in WFI and will wake to an invalid address.
+	 */
+	if ((soc_is_omap44xx() || soc_is_omap54xx()) &&
+	    !omap4_smp_cpu1_startup_valid(cpu1_ns_pa_addr))
+		needs_reset = true;
+
+	if (!needs_reset || !c->cpu1_rstctrl_va)
+		return;
+
+	pr_info("smp: CPU1 parked within kernel, needs reset (0x%lx 0x%lx)\n",
+		cpu1_startup_pa, cpu1_ns_pa_addr);
+
+	writel_relaxed(1, c->cpu1_rstctrl_va);
+	readl_relaxed(c->cpu1_rstctrl_va);
+	writel_relaxed(0, c->cpu1_rstctrl_va);
+}
+
 static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 {
-	void __iomem *base = omap_get_wakeupgen_base();
 	const struct omap_smp_config *c = NULL;
 
 	if (soc_is_omap443x())
@@ -281,6 +350,7 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 	/* Must preserve cfg.scu_base set earlier */
 	cfg.cpu1_rstctrl_pa = c->cpu1_rstctrl_pa;
 	cfg.startup_addr = c->startup_addr;
+	cfg.wakeupgen_base = omap_get_wakeupgen_base();
 
 	if (soc_is_dra74x() || soc_is_omap54xx()) {
 		if ((__boot_cpu_mode & MODE_MASK) == HYP_MODE)
@@ -299,15 +369,7 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 	if (cfg.scu_base)
 		scu_enable(cfg.scu_base);
 
-	/*
-	 * Reset CPU1 before configuring, otherwise kexec will
-	 * end up trying to use old kernel startup address.
-	 */
-	if (cfg.cpu1_rstctrl_va) {
-		writel_relaxed(1, cfg.cpu1_rstctrl_va);
-		readl_relaxed(cfg.cpu1_rstctrl_va);
-		writel_relaxed(0, cfg.cpu1_rstctrl_va);
-	}
+	omap4_smp_maybe_reset_cpu1(&cfg);
 
 	/*
 	 * Write the address of secondary startup routine into the
@@ -319,7 +381,7 @@ static void __init omap4_smp_prepare_cpus(unsigned int max_cpus)
 		omap_auxcoreboot_addr(__pa_symbol(cfg.startup_addr));
 	else
 		writel_relaxed(__pa_symbol(cfg.startup_addr),
-			       base + OMAP_AUX_CORE_BOOT_1);
+			       cfg.wakeupgen_base + OMAP_AUX_CORE_BOOT_1);
 }
 
 const struct smp_operations omap4_smp_ops __initconst = {

From a431ecd2d459da3c91a612061f09eb422ffe78e2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 27 Mar 2017 13:52:00 -0400
Subject: [PATCH 075/410] Revert "pata_atiixp: Don't use unconnected secondary
 port on SB600/SB700"

This reverts commit 5946fdaee4ba449e8fbb5d403e1ed69437f916e8.

The original commit's assumption that the secondary port is
unconnected turns out to be false.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Markku Pesonen <tourula@gmail.com>
Fixes: 5946fdaee4ba ("pata_atiixp: Don't use unconnected secondary port on SB600/SB700")
Cc: Darren Stevens <darren@stevens-zone.net>
---
 drivers/ata/pata_atiixp.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index 6c9aa95a9a05..49d705c9f0f7 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -278,11 +278,6 @@ static int atiixp_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	};
 	const struct ata_port_info *ppi[] = { &info, &info };
 
-	/* SB600/700 don't have secondary port wired */
-	if ((pdev->device == PCI_DEVICE_ID_ATI_IXP600_IDE) ||
-		(pdev->device == PCI_DEVICE_ID_ATI_IXP700_IDE))
-		ppi[1] = &ata_dummy_port_info;
-
 	return ata_pci_bmdma_init_one(pdev, ppi, &atiixp_sht, NULL,
 				      ATA_HOST_PARALLEL_SCAN);
 }

From ab6434a1377a768a1e6d3e6cf819eb21724a99c2 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Mon, 27 Mar 2017 14:30:06 -0400
Subject: [PATCH 076/410] audit: move audit_signal_info() into kernel/auditsc.c

Commit 5b52330bbfe6 ("audit: fix auditd/kernel connection state
tracking") made inlining audit_signal_info() a bit pointless as
it was always calling into auditd_test_task() so let's remove the
inline function in kernel/audit.h and convert __audit_signal_info()
in kernel/auditsc.c into audit_signal_info().

Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit.h   |  8 +-------
 kernel/auditsc.c | 25 +++++++++++++------------
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/kernel/audit.h b/kernel/audit.h
index 0f1cf6d1878a..0d87f8ab8778 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -333,13 +333,7 @@ extern u32 audit_sig_sid;
 extern int audit_filter(int msgtype, unsigned int listtype);
 
 #ifdef CONFIG_AUDITSYSCALL
-extern int __audit_signal_info(int sig, struct task_struct *t);
-static inline int audit_signal_info(int sig, struct task_struct *t)
-{
-	if (auditd_test_task(t) || (audit_signals && !audit_dummy_context()))
-		return __audit_signal_info(sig, t);
-	return 0;
-}
+extern int audit_signal_info(int sig, struct task_struct *t);
 extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
 extern struct list_head *audit_killed_trees(void);
 #else
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e59ffc7fc522..1c2333155893 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2249,26 +2249,27 @@ void __audit_ptrace(struct task_struct *t)
  * If the audit subsystem is being terminated, record the task (pid)
  * and uid that is doing that.
  */
-int __audit_signal_info(int sig, struct task_struct *t)
+int audit_signal_info(int sig, struct task_struct *t)
 {
 	struct audit_aux_data_pids *axp;
 	struct task_struct *tsk = current;
 	struct audit_context *ctx = tsk->audit_context;
 	kuid_t uid = current_uid(), t_uid = task_uid(t);
 
-	if (auditd_test_task(t)) {
-		if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
-			audit_sig_pid = task_tgid_nr(tsk);
-			if (uid_valid(tsk->loginuid))
-				audit_sig_uid = tsk->loginuid;
-			else
-				audit_sig_uid = uid;
-			security_task_getsecid(tsk, &audit_sig_sid);
-		}
-		if (!audit_signals || audit_dummy_context())
-			return 0;
+	if (auditd_test_task(t) &&
+	    (sig == SIGTERM || sig == SIGHUP ||
+	     sig == SIGUSR1 || sig == SIGUSR2)) {
+		audit_sig_pid = task_tgid_nr(tsk);
+		if (uid_valid(tsk->loginuid))
+			audit_sig_uid = tsk->loginuid;
+		else
+			audit_sig_uid = uid;
+		security_task_getsecid(tsk, &audit_sig_sid);
 	}
 
+	if (!audit_signals || audit_dummy_context())
+		return 0;
+
 	/* optimize the common case by putting first signal recipient directly
 	 * in audit_context */
 	if (!ctx->target_pid) {

From 248ccd5ee644fcf68984d945cffcdc364992ddbf Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 27 Mar 2017 10:48:11 -0700
Subject: [PATCH 077/410] MAINTAINERS: Add Andrew Lunn as co-maintainer of
 PHYLIB

Andrew has been contributing a lot to PHYLIB over the past months and
his feedback on patches is more than welcome.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e48678d15401..9975dcefc372 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4922,6 +4922,7 @@ F:	include/linux/netfilter_bridge/
 F:	net/bridge/
 
 ETHERNET PHY LIBRARY
+M:	Andrew Lunn <andrew@lunn.ch>
 M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained

From ffefb6f4d6ad699a2b5484241bc46745a53235d0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 27 Mar 2017 18:00:14 +0100
Subject: [PATCH 078/410] net: ipconfig: fix ic_close_devs() use-after-free

Our chosen ic_dev may be anywhere in our list of ic_devs, and we may
free it before attempting to close others. When we compare d->dev and
ic_dev->dev, we're potentially dereferencing memory returned to the
allocator. This causes KASAN to scream for each subsequent ic_dev we
check.

As there's a 1-1 mapping between ic_devs and netdevs, we can instead
compare d and ic_dev directly, which implicitly handles the !ic_dev
case, and avoids the use-after-free. The ic_dev pointer may be stale,
but we will not dereference it.

Original splat:

[    6.487446] ==================================================================
[    6.494693] BUG: KASAN: use-after-free in ic_close_devs+0xc4/0x154 at addr ffff800367efa708
[    6.503013] Read of size 8 by task swapper/0/1
[    6.507452] CPU: 5 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc3-00002-gda42158 #8
[    6.514993] Hardware name: AppliedMicro Mustang/Mustang, BIOS 3.05.05-beta_rc Jan 27 2016
[    6.523138] Call trace:
[    6.525590] [<ffff200008094778>] dump_backtrace+0x0/0x570
[    6.530976] [<ffff200008094d08>] show_stack+0x20/0x30
[    6.536017] [<ffff200008bee928>] dump_stack+0x120/0x188
[    6.541231] [<ffff20000856d5e4>] kasan_object_err+0x24/0xa0
[    6.546790] [<ffff20000856d924>] kasan_report_error+0x244/0x738
[    6.552695] [<ffff20000856dfec>] __asan_report_load8_noabort+0x54/0x80
[    6.559204] [<ffff20000aae86ac>] ic_close_devs+0xc4/0x154
[    6.564590] [<ffff20000aaedbac>] ip_auto_config+0x2ed4/0x2f1c
[    6.570321] [<ffff200008084b04>] do_one_initcall+0xcc/0x370
[    6.575882] [<ffff20000aa31de8>] kernel_init_freeable+0x5f8/0x6c4
[    6.581959] [<ffff20000a16df00>] kernel_init+0x18/0x190
[    6.587171] [<ffff200008084710>] ret_from_fork+0x10/0x40
[    6.592468] Object at ffff800367efa700, in cache kmalloc-128 size: 128
[    6.598969] Allocated:
[    6.601324] PID = 1
[    6.603427]  save_stack_trace_tsk+0x0/0x418
[    6.607603]  save_stack_trace+0x20/0x30
[    6.611430]  kasan_kmalloc+0xd8/0x188
[    6.615087]  ip_auto_config+0x8c4/0x2f1c
[    6.619002]  do_one_initcall+0xcc/0x370
[    6.622832]  kernel_init_freeable+0x5f8/0x6c4
[    6.627178]  kernel_init+0x18/0x190
[    6.630660]  ret_from_fork+0x10/0x40
[    6.634223] Freed:
[    6.636233] PID = 1
[    6.638334]  save_stack_trace_tsk+0x0/0x418
[    6.642510]  save_stack_trace+0x20/0x30
[    6.646337]  kasan_slab_free+0x88/0x178
[    6.650167]  kfree+0xb8/0x478
[    6.653131]  ic_close_devs+0x130/0x154
[    6.656875]  ip_auto_config+0x2ed4/0x2f1c
[    6.660875]  do_one_initcall+0xcc/0x370
[    6.664705]  kernel_init_freeable+0x5f8/0x6c4
[    6.669051]  kernel_init+0x18/0x190
[    6.672534]  ret_from_fork+0x10/0x40
[    6.676098] Memory state around the buggy address:
[    6.680880]  ffff800367efa600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    6.688078]  ffff800367efa680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[    6.695276] >ffff800367efa700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[    6.702469]                       ^
[    6.705952]  ffff800367efa780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[    6.713149]  ffff800367efa800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[    6.720343] ==================================================================
[    6.727536] Disabling lock debugging due to kernel taint

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Cc: David S. Miller <davem@davemloft.net>
Cc: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Cc: James Morris <jmorris@namei.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index fd9f34bbd740..dfb2ab2dd3c8 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -306,7 +306,7 @@ static void __init ic_close_devs(void)
 	while ((d = next)) {
 		next = d->next;
 		dev = d->dev;
-		if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
+		if (d != ic_dev && !netdev_uses_dsa(dev)) {
 			pr_debug("IP-Config: Downing %s\n", dev->name);
 			dev_change_flags(dev, d->flags);
 		}

From 59f1183dd368f12c0a80da3c91a4a42afa4e1d38 Mon Sep 17 00:00:00 2001
From: Nitin Gupta <nitin.m.gupta@oracle.com>
Date: Fri, 3 Mar 2017 14:40:44 -0800
Subject: [PATCH 079/410] sparc64: Fix size check in huge_pte_alloc

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/hugetlbpage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 323bc6b6e3ad..30168500603e 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -261,7 +261,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 		if (!pmd)
 			return NULL;
 
-		if (sz == PMD_SHIFT)
+		if (sz >= PMD_SIZE)
 			pte = (pte_t *)pmd;
 		else
 			pte = pte_alloc_map(mm, pmd, addr);

From 85b1da7c47052330af9485a5f5c7e54ede882e65 Mon Sep 17 00:00:00 2001
From: Nitin Gupta <nitin.m.gupta@oracle.com>
Date: Thu, 9 Mar 2017 14:22:23 -0800
Subject: [PATCH 080/410] sparc64: Add support for 2G hugepages

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/page_64.h | 3 ++-
 arch/sparc/mm/hugetlbpage.c      | 7 +++++++
 arch/sparc/mm/init_64.c          | 4 ++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index f294dd42fc7d..5961b2d8398a 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -17,6 +17,7 @@
 
 #define HPAGE_SHIFT		23
 #define REAL_HPAGE_SHIFT	22
+#define HPAGE_2GB_SHIFT		31
 #define HPAGE_256MB_SHIFT	28
 #define HPAGE_64K_SHIFT		16
 #define REAL_HPAGE_SIZE		(_AC(1,UL) << REAL_HPAGE_SHIFT)
@@ -27,7 +28,7 @@
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 #define REAL_HPAGE_PER_HPAGE	(_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
-#define HUGE_MAX_HSTATE		3
+#define HUGE_MAX_HSTATE		4
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 30168500603e..ee5273ad918d 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
 	pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
 
 	switch (shift) {
+	case HPAGE_2GB_SHIFT:
+		hugepage_size = _PAGE_SZ2GB_4V;
+		pte_val(entry) |= _PAGE_PMD_HUGE;
+		break;
 	case HPAGE_256MB_SHIFT:
 		hugepage_size = _PAGE_SZ256MB_4V;
 		pte_val(entry) |= _PAGE_PMD_HUGE;
@@ -183,6 +187,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
 	unsigned int shift;
 
 	switch (tte_szbits) {
+	case _PAGE_SZ2GB_4V:
+		shift = HPAGE_2GB_SHIFT;
+		break;
 	case _PAGE_SZ256MB_4V:
 		shift = HPAGE_256MB_SHIFT;
 		break;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index ccd455328989..3328043e990c 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -337,6 +337,10 @@ static int __init setup_hugepagesz(char *string)
 	hugepage_shift = ilog2(hugepage_size);
 
 	switch (hugepage_shift) {
+	case HPAGE_2GB_SHIFT:
+		hv_pgsz_mask = HV_PGSZ_MASK_2GB;
+		hv_pgsz_idx = HV_PGSZ_IDX_2GB;
+		break;
 	case HPAGE_256MB_SHIFT:
 		hv_pgsz_mask = HV_PGSZ_MASK_256MB;
 		hv_pgsz_idx = HV_PGSZ_IDX_256MB;

From adfae8a5d833fa2b46577a8081f350e408851f5b Mon Sep 17 00:00:00 2001
From: bob picco <bob.picco@oracle.com>
Date: Fri, 10 Mar 2017 14:31:19 -0500
Subject: [PATCH 081/410] sparc64: kern_addr_valid regression

I encountered this bug when using /proc/kcore to examine the kernel. Plus a
coworker inquired about debugging tools. We computed pa but did
not use it during the maximum physical address bits test. Instead we used
the identity mapped virtual address which will always fail this test.

I believe the defect came in here:
[bpicco@zareason linus.git]$ git describe --contains bb4e6e85daa52
v3.18-rc1~87^2~4
.

Signed-off-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/init_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 3328043e990c..0cda653ae007 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1567,7 +1567,7 @@ bool kern_addr_valid(unsigned long addr)
 	if ((long)addr < 0L) {
 		unsigned long pa = __pa(addr);
 
-		if ((addr >> max_phys_bits) != 0UL)
+		if ((pa >> max_phys_bits) != 0UL)
 			return false;
 
 		return pfn_valid(pa >> PAGE_SHIFT);

From 0ae2d26ffe70c32d4a7fe77593f0a55ce416c09e Mon Sep 17 00:00:00 2001
From: Babu Moger <babu.moger@oracle.com>
Date: Fri, 17 Mar 2017 14:52:21 -0600
Subject: [PATCH 082/410] arch/sparc: Avoid DCTI Couples
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid un-intended DCTI Couples. Use of DCTI couples is deprecated.
Also address the "Programming Note" for optimal performance.

Here is the complete text from Oracle SPARC Architecture Specs.

6.3.4.7 DCTI Couples
"A delayed control transfer instruction (DCTI) in the delay slot of
another DCTI is referred to as a “DCTI couple”. The use of DCTI couples
is deprecated in the Oracle SPARC Architecture; no new software should
place a DCTI in the delay slot of another DCTI, because on future Oracle
SPARC Architecture implementations DCTI couples may execute either
slowly or differently than the programmer assumes it will.

SPARC V8 and SPARC V9 Compatibility Note
The SPARC V8 architecture left behavior undefined for a DCTI couple. The
SPARC V9 architecture defined behavior in that case, but as of
UltraSPARC Architecture 2005, use of DCTI couples was deprecated.
Software should not expect high performance from DCTI couples, and
performance of DCTI couples should be expected to decline further in
future processors.

Programming Note
As noted in TABLE 6-5 on page 115, an annulled branch-always
(branch-always with a = 1) instruction is not architecturally a DCTI.
However, since not all implementations make that distinction, for
optimal performance, a DCTI should not be placed in the instruction word
immediately following an annulled branch-always instruction (BA,A or
BPA,A)."

Signed-off-by: Babu Moger <babu.moger@oracle.com>
Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/head_64.S        | 4 ++++
 arch/sparc/kernel/misctrap.S       | 1 +
 arch/sparc/kernel/rtrap_64.S       | 1 +
 arch/sparc/kernel/spiterrs.S       | 1 +
 arch/sparc/kernel/sun4v_tlb_miss.S | 1 +
 arch/sparc/kernel/urtt_fill.S      | 1 +
 arch/sparc/kernel/winfixup.S       | 2 ++
 arch/sparc/lib/NG2memcpy.S         | 4 ++++
 arch/sparc/lib/NG4memcpy.S         | 1 +
 arch/sparc/lib/NG4memset.S         | 1 +
 arch/sparc/lib/NGmemcpy.S          | 1 +
 11 files changed, 18 insertions(+)

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 6aa3da152c20..44101196d02b 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -96,6 +96,7 @@ sparc64_boot:
 	andn	%g1, PSTATE_AM, %g1
 	wrpr	%g1, 0x0, %pstate
 	ba,a,pt	%xcc, 1f
+	 nop
 
 	.globl	prom_finddev_name, prom_chosen_path, prom_root_node
 	.globl	prom_getprop_name, prom_mmu_name, prom_peer_name
@@ -613,6 +614,7 @@ niagara_tlb_fixup:
 	 nop
 
 	ba,a,pt	%xcc, 80f
+	 nop
 niagara4_patch:
 	call	niagara4_patch_copyops
 	 nop
@@ -622,6 +624,7 @@ niagara4_patch:
 	 nop
 
 	ba,a,pt	%xcc, 80f
+	 nop
 
 niagara2_patch:
 	call	niagara2_patch_copyops
@@ -632,6 +635,7 @@ niagara2_patch:
 	 nop
 
 	ba,a,pt	%xcc, 80f
+	 nop
 
 niagara_patch:
 	call	niagara_patch_copyops
diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S
index 34b4933900bf..9276d2f0dd86 100644
--- a/arch/sparc/kernel/misctrap.S
+++ b/arch/sparc/kernel/misctrap.S
@@ -82,6 +82,7 @@ do_stdfmna:
 	call		handle_stdfmna
 	 add		%sp, PTREGS_OFF, %o0
 	ba,a,pt		%xcc, rtrap
+	 nop
 	.size		do_stdfmna,.-do_stdfmna
 
 	.type		breakpoint_trap,#function
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
index 216948ca4382..709a82ebd294 100644
--- a/arch/sparc/kernel/rtrap_64.S
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -237,6 +237,7 @@ rt_continue:	ldx			[%sp + PTREGS_OFF + PT_V9_G1], %g1
 		bne,pt			%xcc, user_rtt_fill_32bit
 		 wrpr			%g1, %cwp
 		ba,a,pt			%xcc, user_rtt_fill_64bit
+		 nop
 
 user_rtt_fill_fixup_dax:
 		ba,pt	%xcc, user_rtt_fill_fixup_common
diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S
index 4a73009f66a5..d7e540842809 100644
--- a/arch/sparc/kernel/spiterrs.S
+++ b/arch/sparc/kernel/spiterrs.S
@@ -86,6 +86,7 @@ __spitfire_cee_trap_continue:
 	 rd		%pc, %g7
 
 	ba,a,pt		%xcc, 2f
+	 nop
 
 1:	ba,pt		%xcc, etrap_irq
 	 rd		%pc, %g7
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
index 6179e19bc9b9..c19f352f46c7 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -352,6 +352,7 @@ sun4v_mna:
 	call	sun4v_do_mna
 	 add	%sp, PTREGS_OFF, %o0
 	ba,a,pt	%xcc, rtrap
+	 nop
 
 	/* Privileged Action.  */
 sun4v_privact:
diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S
index 5604a2b051d4..364af3250646 100644
--- a/arch/sparc/kernel/urtt_fill.S
+++ b/arch/sparc/kernel/urtt_fill.S
@@ -92,6 +92,7 @@ user_rtt_fill_fixup_common:
 		call	sun4v_data_access_exception
 		 nop
 		ba,a,pt	%xcc, rtrap
+		 nop
 
 1:		call	spitfire_data_access_exception
 		 nop
diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S
index 855019a8590e..1ee173cc3c39 100644
--- a/arch/sparc/kernel/winfixup.S
+++ b/arch/sparc/kernel/winfixup.S
@@ -152,6 +152,8 @@ fill_fixup_dax:
 	call	sun4v_data_access_exception
 	 nop
 	ba,a,pt	%xcc, rtrap
+	 nop
 1:	call	spitfire_data_access_exception
 	 nop
 	ba,a,pt	%xcc, rtrap
+	 nop
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
index c629dbd121b6..64dcd6cdb606 100644
--- a/arch/sparc/lib/NG2memcpy.S
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -326,11 +326,13 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	blu		170f
 	 nop
 	ba,a,pt		%xcc, 180f
+	 nop
 
 4:	/* 32 <= low bits < 48 */
 	blu		150f
 	 nop
 	ba,a,pt		%xcc, 160f
+	 nop
 5:	/* 0 < low bits < 32 */
 	blu,a		6f
 	 cmp		%g2, 8
@@ -338,6 +340,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	blu		130f
 	 nop
 	ba,a,pt		%xcc, 140f
+	 nop
 6:	/* 0 < low bits < 16 */
 	bgeu		120f
 	 nop
@@ -475,6 +478,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	brz,pt		%o2, 85f
 	 sub		%o0, %o1, GLOBAL_SPARE
 	ba,a,pt		%XCC, 90f
+	 nop
 
 	.align		64
 75: /* 16 < len <= 64 */
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 75bb93b1437f..78ea962edcbe 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -530,4 +530,5 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	bne,pt		%icc, 1b
 	 EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
 	ba,a,pt		%icc, .Lexit
+	 nop
 	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S
index 41da4bdd95cb..7c0c81f18837 100644
--- a/arch/sparc/lib/NG4memset.S
+++ b/arch/sparc/lib/NG4memset.S
@@ -102,4 +102,5 @@ NG4bzero:
 	bne,pt		%icc, 1b
 	 add		%o0, 0x30, %o0
 	ba,a,pt		%icc, .Lpostloop
+	 nop
 	.size		NG4bzero,.-NG4bzero
diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
index d88c4ed50a00..cd654a719b27 100644
--- a/arch/sparc/lib/NGmemcpy.S
+++ b/arch/sparc/lib/NGmemcpy.S
@@ -394,6 +394,7 @@ FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
 	brz,pt		%i2, 85f
 	 sub		%o0, %i1, %i3
 	ba,a,pt		%XCC, 90f
+	 nop
 
 	.align		64
 70: /* 16 < len <= 64 */

From b03b99a329a14b7302f37c3ea6da3848db41c8c5 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 27 Mar 2017 21:53:38 -0700
Subject: [PATCH 083/410] acpi, nfit, libnvdimm: fix interleave set cookie
 calculation (64-bit comparison)

While reviewing the -stable patch for commit 86ef58a4e35e "nfit,
libnvdimm: fix interleave set cookie calculation" Ben noted:

    "This is returning an int, thus it's effectively doing a 32-bit
     comparison and not the 64-bit comparison you say is needed."

Update the compare operation to be immune to this integer demotion problem.

Cc: <stable@vger.kernel.org>
Cc: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Fixes: 86ef58a4e35e ("nfit, libnvdimm: fix interleave set cookie calculation")
Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 662036bdc65e..c8ea9d698cd0 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1617,7 +1617,11 @@ static int cmp_map(const void *m0, const void *m1)
 	const struct nfit_set_info_map *map0 = m0;
 	const struct nfit_set_info_map *map1 = m1;
 
-	return map0->region_offset - map1->region_offset;
+	if (map0->region_offset < map1->region_offset)
+		return -1;
+	else if (map0->region_offset > map1->region_offset)
+		return 1;
+	return 0;
 }
 
 /* Retrieve the nth entry referencing this spa */

From cc66afea58f858ff6da7f79b8a595a67bbb4f9a9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 27 Mar 2017 11:32:59 +0200
Subject: [PATCH 084/410] x86/mce: Don't print MCEs when mcelog is active

Since:

  cd9c57cad3fe ("x86/MCE: Dump MCE to dmesg if no consumers")

all MCEs are printed even when mcelog is running. Fix the regression to
not print to dmesg when mcelog is running as it is a consumer too.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
[ Massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: stable@vger.kernel.org # 4.10..
Fixes: cd9c57cad3fe ("x86/MCE: Dump MCE to dmesg if no consumers")
Link: http://lkml.kernel.org/r/20170327093304.10683-2-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8e9725c607ea..5accfbdee3f0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -54,6 +54,8 @@
 
 static DEFINE_MUTEX(mce_chrdev_read_mutex);
 
+static int mce_chrdev_open_count;	/* #times opened */
+
 #define mce_log_get_idx_check(p) \
 ({ \
 	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
@@ -598,6 +600,10 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
 	if (atomic_read(&num_notifiers) > 2)
 		return NOTIFY_DONE;
 
+	/* Don't print when mcelog is running */
+	if (mce_chrdev_open_count > 0)
+		return NOTIFY_DONE;
+
 	__print_mce(m);
 
 	return NOTIFY_DONE;
@@ -1828,7 +1834,6 @@ void mcheck_cpu_clear(struct cpuinfo_x86 *c)
  */
 
 static DEFINE_SPINLOCK(mce_chrdev_state_lock);
-static int mce_chrdev_open_count;	/* #times opened */
 static int mce_chrdev_open_exclu;	/* already open exclusive? */
 
 static int mce_chrdev_open(struct inode *inode, struct file *file)

From 7ed23e1bae8bf7e37fd555066550a00b95a3a98b Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 20 Mar 2017 17:49:03 +1100
Subject: [PATCH 085/410] powerpc: Disable HFSCR[TM] if TM is not supported

On Power8 & Power9 the early CPU inititialisation in __init_HFSCR()
turns on HFSCR[TM] (Hypervisor Facility Status and Control Register
[Transactional Memory]), but that doesn't take into account that TM
might be disabled by CPU features, or disabled by the kernel being built
with CONFIG_PPC_TRANSACTIONAL_MEM=n.

So later in boot, when we have setup the CPU features, clear HSCR[TM] if
the TM CPU feature has been disabled. We use CPU_FTR_TM_COMP to account
for the CONFIG_PPC_TRANSACTIONAL_MEM=n case.

Without this a KVM guest might try use TM, even if told not to, and
cause an oops in the host kernel. Typically the oops is seen in
__kvmppc_vcore_entry() and may or may not be fatal to the host, but is
always bad news.

In practice all shipping CPU revisions do support TM, and all host
kernels we are aware of build with TM support enabled, so no one should
actually be able to hit this in the wild.

Fixes: 2a3563b023e5 ("powerpc: Setup in HFSCR for POWER8")
Cc: stable@vger.kernel.org # v3.10+
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Sam Bobroff <sam.bobroff@au1.ibm.com>
[mpe: Rewrite change log with input from Sam, add Fixes/stable]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/setup_64.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 9cfaa8b69b5f..f997154dfc41 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -236,6 +236,15 @@ static void cpu_ready_for_interrupts(void)
 		mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
 	}
 
+	/*
+	 * Fixup HFSCR:TM based on CPU features. The bit is set by our
+	 * early asm init because at that point we haven't updated our
+	 * CPU features from firmware and device-tree. Here we have,
+	 * so let's do it.
+	 */
+	if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP))
+		mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
+
 	/* Set IR and DR in PACA MSR */
 	get_paca()->kernel_msr = MSR_KERNEL;
 }

From f9ba3501d50317697811ff3c48f623f08d616fc8 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 27 Mar 2017 00:21:15 +0800
Subject: [PATCH 086/410] sctp: change to save MSG_MORE flag into assoc

David Laight noticed the support for MSG_MORE with datamsg->force_delay
didn't really work as we expected, as the first msg with MSG_MORE set
would always block the following chunks' dequeuing.

This Patch is to rewrite it by saving the MSG_MORE flag into assoc as
David Laight suggested.

asoc->force_delay is used to save MSG_MORE flag before a msg is sent.
All chunks in queue would not be sent out if asoc->force_delay is set
by the msg with MSG_MORE flag, until a new msg without MSG_MORE flag
clears asoc->force_delay.

Note that this change would not affect the flush is generated by other
triggers, like asoc->state != ESTABLISHED, queue size > pmtu etc.

v1->v2:
  Not clear asoc->force_delay after sending the msg with MSG_MORE flag.

Fixes: 4ea0c32f5f42 ("sctp: add support for MSG_MORE")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: David Laight <david.laight@aculab.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 +-
 net/sctp/output.c          | 2 +-
 net/sctp/socket.c          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 592decebac75..8caa5ee9e290 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -499,7 +499,6 @@ struct sctp_datamsg {
 	/* Did the messenge fail to send? */
 	int send_error;
 	u8 send_failed:1,
-	   force_delay:1,
 	   can_delay;	    /* should this message be Nagle delayed */
 };
 
@@ -1878,6 +1877,7 @@ struct sctp_association {
 
 	__u8 need_ecne:1,	/* Need to send an ECNE Chunk? */
 	     temp:1,		/* Is it a temporary association? */
+	     force_delay:1,
 	     prsctp_enable:1,
 	     reconf_enable:1;
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1224421036b3..73fd178007a3 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -704,7 +704,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
 	 */
 
 	if ((sctp_sk(asoc->base.sk)->nodelay || inflight == 0) &&
-	    !chunk->msg->force_delay)
+	    !asoc->force_delay)
 		/* Nothing unacked */
 		return SCTP_XMIT_OK;
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0f378ea2ae38..baa269a0d52e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1965,7 +1965,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 		err = PTR_ERR(datamsg);
 		goto out_free;
 	}
-	datamsg->force_delay = !!(msg->msg_flags & MSG_MORE);
+	asoc->force_delay = !!(msg->msg_flags & MSG_MORE);
 
 	/* Now send the (possibly) fragmented message. */
 	list_for_each_entry(chunk, &datamsg->chunks, frag_list) {

From af109a2cf6a9a6271fa420ae2d64d72d86c92b7d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Mar 2017 12:11:07 +0200
Subject: [PATCH 087/410] isdn: kcapi: avoid uninitialized data

gcc-7 points out that the AVMB1_ADDCARD ioctl results in an unintialized
value ending up in the cardnr parameter:

drivers/isdn/capi/kcapi.c: In function 'old_capi_manufacturer':
drivers/isdn/capi/kcapi.c:1042:24: error: 'cdef.cardnr' may be used uninitialized in this function [-Werror=maybe-uninitialized]
   cparams.cardnr = cdef.cardnr;

This has been broken since before the start of the git history, so
either the value is not used for anything important, or the ioctl
command doesn't get called in practice.

Setting the cardnr to zero avoids the warning and makes sure
we have consistent behavior.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/kcapi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index 1dfd1085a04f..9ca691d6c13b 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -1032,6 +1032,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
 						     sizeof(avmb1_carddef))))
 				return -EFAULT;
 			cdef.cardtype = AVM_CARDTYPE_B1;
+			cdef.cardnr = 0;
 		} else {
 			if ((retval = copy_from_user(&cdef, data,
 						     sizeof(avmb1_extcarddef))))

From c2b341a620018d4eaeb0e85c16274ac4e5f153d4 Mon Sep 17 00:00:00 2001
From: Jonas Jensen <jonas.jensen@gmail.com>
Date: Tue, 28 Mar 2017 12:12:38 +0200
Subject: [PATCH 088/410] net: moxa: fix TX overrun memory leak

moxart_mac_start_xmit() doesn't care where tx_tail is, tx_head can
catch and pass tx_tail, which is bad because moxart_tx_finished()
isn't guaranteed to catch up on freeing resources from tx_tail.

Add a check in moxart_mac_start_xmit() stopping the queue at the
end of the circular buffer. Also add a check in moxart_tx_finished()
waking the queue if the buffer has TX_WAKE_THRESHOLD or more
free descriptors.

While we're at it, move spin_lock_irq() to happen before our
descriptor pointer is assigned in moxart_mac_start_xmit().

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=99451

Signed-off-by: Jonas Jensen <jonas.jensen@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/moxa/moxart_ether.c | 20 ++++++++++++++++++--
 drivers/net/ethernet/moxa/moxart_ether.h |  1 +
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 06c9f4100cb9..6ad44be08b33 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -25,6 +25,7 @@
 #include <linux/of_irq.h>
 #include <linux/crc32.h>
 #include <linux/crc32c.h>
+#include <linux/circ_buf.h>
 
 #include "moxart_ether.h"
 
@@ -278,6 +279,13 @@ rx_next:
 	return rx;
 }
 
+static int moxart_tx_queue_space(struct net_device *ndev)
+{
+	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+
+	return CIRC_SPACE(priv->tx_head, priv->tx_tail, TX_DESC_NUM);
+}
+
 static void moxart_tx_finished(struct net_device *ndev)
 {
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
@@ -297,6 +305,9 @@ static void moxart_tx_finished(struct net_device *ndev)
 		tx_tail = TX_NEXT(tx_tail);
 	}
 	priv->tx_tail = tx_tail;
+	if (netif_queue_stopped(ndev) &&
+	    moxart_tx_queue_space(ndev) >= TX_WAKE_THRESHOLD)
+		netif_wake_queue(ndev);
 }
 
 static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
@@ -324,13 +335,18 @@ static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
 	void *desc;
 	unsigned int len;
-	unsigned int tx_head = priv->tx_head;
+	unsigned int tx_head;
 	u32 txdes1;
 	int ret = NETDEV_TX_BUSY;
 
+	spin_lock_irq(&priv->txlock);
+
+	tx_head = priv->tx_head;
 	desc = priv->tx_desc_base + (TX_REG_DESC_SIZE * tx_head);
 
-	spin_lock_irq(&priv->txlock);
+	if (moxart_tx_queue_space(ndev) == 1)
+		netif_stop_queue(ndev);
+
 	if (moxart_desc_read(desc + TX_REG_OFFSET_DESC0) & TX_DESC0_DMA_OWN) {
 		net_dbg_ratelimited("no TX space for packet\n");
 		priv->stats.tx_dropped++;
diff --git a/drivers/net/ethernet/moxa/moxart_ether.h b/drivers/net/ethernet/moxa/moxart_ether.h
index 93a9563ac7c6..afc32ec998c0 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.h
+++ b/drivers/net/ethernet/moxa/moxart_ether.h
@@ -59,6 +59,7 @@
 #define TX_NEXT(N)		(((N) + 1) & (TX_DESC_NUM_MASK))
 #define TX_BUF_SIZE		1600
 #define TX_BUF_SIZE_MAX		(TX_DESC1_BUF_SIZE_MASK+1)
+#define TX_WAKE_THRESHOLD	16
 
 #define RX_DESC_NUM		64
 #define RX_DESC_NUM_MASK	(RX_DESC_NUM-1)

From e497ec680c4cd51e76bfcdd49363d9ab8d32a757 Mon Sep 17 00:00:00 2001
From: Talat Batheesh <talatb@mellanox.com>
Date: Tue, 28 Mar 2017 16:13:41 +0300
Subject: [PATCH 089/410] net/mlx5: Avoid dereferencing uninitialized pointer

In NETDEV_CHANGEUPPER event the upper_info field is valid
only when linking is true. Otherwise it should be ignored.

Fixes: 7907f23adc18 (net/mlx5: Implement RoCE LAG feature)
Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Reviewed-by: Aviv Heller <avivh@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 55957246c0e8..b5d5519542e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -294,7 +294,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 					 struct netdev_notifier_changeupper_info *info)
 {
 	struct net_device *upper = info->upper_dev, *ndev_tmp;
-	struct netdev_lag_upper_info *lag_upper_info;
+	struct netdev_lag_upper_info *lag_upper_info = NULL;
 	bool is_bonded;
 	int bond_status = 0;
 	int num_slaves = 0;
@@ -303,7 +303,8 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 	if (!netif_is_lag_master(upper))
 		return 0;
 
-	lag_upper_info = info->upper_info;
+	if (info->linking)
+		lag_upper_info = info->upper_info;
 
 	/* The event may still be of interest if the slave does not belong to
 	 * us, but is enslaved to a master which has one or more of our netdevs

From 16b8b6de32572207abeb4dfb74ab7bd8409a5690 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Mar 2017 16:11:18 +0200
Subject: [PATCH 090/410] rocker: fix Wmaybe-uninitialized false-positive

gcc-7 reports a warning that earlier versions did not have:

drivers/net/ethernet/rocker/rocker_ofdpa.c: In function 'ofdpa_port_stp_update':
arch/x86/include/asm/string_32.h:79:22: error: '*((void *)&prev_ctrls+4)' may be used uninitialized in this function [-Werror=maybe-uninitialized]
   *((short *)to + 2) = *((short *)from + 2);
   ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/rocker/rocker_ofdpa.c:2218:7: note: '*((void *)&prev_ctrls+4)' was declared here

This is clearly a variation of the warning about 'prev_state' that
was shut up using uninitialized_var().

We can slightly simplify the code and get rid of the warning by unconditionally
saving the prev_state and prev_ctrls variables. The inlined memcpy is not
particularly expensive here, as it just has to read five bytes from one or
two cache lines.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/rocker/rocker_ofdpa.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 7cd76b6b5cb9..2ae852454780 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -2216,18 +2216,15 @@ static int ofdpa_port_stp_update(struct ofdpa_port *ofdpa_port,
 {
 	bool want[OFDPA_CTRL_MAX] = { 0, };
 	bool prev_ctrls[OFDPA_CTRL_MAX];
-	u8 uninitialized_var(prev_state);
+	u8 prev_state;
 	int err;
 	int i;
 
-	if (switchdev_trans_ph_prepare(trans)) {
-		memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls));
-		prev_state = ofdpa_port->stp_state;
-	}
-
-	if (ofdpa_port->stp_state == state)
+	prev_state = ofdpa_port->stp_state;
+	if (prev_state == state)
 		return 0;
 
+	memcpy(prev_ctrls, ofdpa_port->ctrls, sizeof(prev_ctrls));
 	ofdpa_port->stp_state = state;
 
 	switch (state) {

From b768b16de58d5e0b1d7c3f936825b25327ced20c Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jarno@ovn.org>
Date: Tue, 28 Mar 2017 11:25:26 -0700
Subject: [PATCH 091/410] openvswitch: Fix refcount leak on force commit.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The reference count held for skb needs to be released when the skb's
nfct pointer is cleared regardless of if nf_ct_delete() is called or
not.

Failing to release the skb's reference cound led to deferred conntrack
cleanup spinning forever within nf_conntrack_cleanup_net_list() when
cleaning up a network namespace:

   kworker/u16:0-19025 [004] 45981067.173642: sched_switch: kworker/u16:0:19025 [120] R ==> rcu_preempt:7 [120]
   kworker/u16:0-19025 [004] 45981067.173651: kernel_stack: <stack trace>
=> ___preempt_schedule (ffffffffa001ed36)
=> _raw_spin_unlock_bh (ffffffffa0713290)
=> nf_ct_iterate_cleanup (ffffffffc00a4454)
=> nf_conntrack_cleanup_net_list (ffffffffc00a5e1e)
=> nf_conntrack_pernet_exit (ffffffffc00a63dd)
=> ops_exit_list.isra.1 (ffffffffa06075f3)
=> cleanup_net (ffffffffa0607df0)
=> process_one_work (ffffffffa0084c31)
=> worker_thread (ffffffffa008592b)
=> kthread (ffffffffa008bee2)
=> ret_from_fork (ffffffffa071b67c)

Fixes: dd41d33f0b03 ("openvswitch: Add force commit.")
Reported-by: Yang Song <yangsong@vmware.com>
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e0a87776a010..7b2c2fce408a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -643,8 +643,8 @@ static bool skb_nfct_cached(struct net *net,
 		 */
 		if (nf_ct_is_confirmed(ct))
 			nf_ct_delete(ct, 0, 0);
-		else
-			nf_conntrack_put(&ct->ct_general);
+
+		nf_conntrack_put(&ct->ct_general);
 		nf_ct_set(skb, NULL, 0);
 		return false;
 	}

From b3ef5520c1eabb56064474043c7c55a1a65b8708 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 28 Mar 2017 09:11:31 +0100
Subject: [PATCH 092/410] cfg80211: check rdev resume callback only for
 registered wiphy

We got the following use-after-free KASAN report:

 BUG: KASAN: use-after-free in wiphy_resume+0x591/0x5a0 [cfg80211]
	 at addr ffff8803fc244090
 Read of size 8 by task kworker/u16:24/2587
 CPU: 6 PID: 2587 Comm: kworker/u16:24 Tainted: G    B 4.9.13-debug+
 Hardware name: Dell Inc. XPS 15 9550/0N7TVV, BIOS 1.2.19 12/22/2016
 Workqueue: events_unbound async_run_entry_fn
  ffff880425d4f9d8 ffffffffaeedb541 ffff88042b80ef00 ffff8803fc244088
  ffff880425d4fa00 ffffffffae84d7a1 ffff880425d4fa98 ffff8803fc244080
  ffff88042b80ef00 ffff880425d4fa88 ffffffffae84da3a ffffffffc141f7d9
 Call Trace:
  [<ffffffffaeedb541>] dump_stack+0x85/0xc4
  [<ffffffffae84d7a1>] kasan_object_err+0x21/0x70
  [<ffffffffae84da3a>] kasan_report_error+0x1fa/0x500
  [<ffffffffc141f7d9>] ? cfg80211_bss_age+0x39/0xc0 [cfg80211]
  [<ffffffffc141f83a>] ? cfg80211_bss_age+0x9a/0xc0 [cfg80211]
  [<ffffffffae48d46d>] ? trace_hardirqs_on+0xd/0x10
  [<ffffffffc13fb1c0>] ? wiphy_suspend+0xc70/0xc70 [cfg80211]
  [<ffffffffae84def1>] __asan_report_load8_noabort+0x61/0x70
  [<ffffffffc13fb100>] ? wiphy_suspend+0xbb0/0xc70 [cfg80211]
  [<ffffffffc13fb751>] ? wiphy_resume+0x591/0x5a0 [cfg80211]
  [<ffffffffc13fb751>] wiphy_resume+0x591/0x5a0 [cfg80211]
  [<ffffffffc13fb1c0>] ? wiphy_suspend+0xc70/0xc70 [cfg80211]
  [<ffffffffaf3b206e>] dpm_run_callback+0x6e/0x4f0
  [<ffffffffaf3b31b2>] device_resume+0x1c2/0x670
  [<ffffffffaf3b367d>] async_resume+0x1d/0x50
  [<ffffffffae3ee84e>] async_run_entry_fn+0xfe/0x610
  [<ffffffffae3d0666>] process_one_work+0x716/0x1a50
  [<ffffffffae3d05c9>] ? process_one_work+0x679/0x1a50
  [<ffffffffafdd7b6d>] ? _raw_spin_unlock_irq+0x3d/0x60
  [<ffffffffae3cff50>] ? pwq_dec_nr_in_flight+0x2b0/0x2b0
  [<ffffffffae3d1a80>] worker_thread+0xe0/0x1460
  [<ffffffffae3d19a0>] ? process_one_work+0x1a50/0x1a50
  [<ffffffffae3e54c2>] kthread+0x222/0x2e0
  [<ffffffffae3e52a0>] ? kthread_park+0x80/0x80
  [<ffffffffae3e52a0>] ? kthread_park+0x80/0x80
  [<ffffffffae3e52a0>] ? kthread_park+0x80/0x80
  [<ffffffffafdd86aa>] ret_from_fork+0x2a/0x40
 Object at ffff8803fc244088, in cache kmalloc-1024 size: 1024
 Allocated:
 PID = 71
  save_stack_trace+0x1b/0x20
  save_stack+0x46/0xd0
  kasan_kmalloc+0xad/0xe0
  kasan_slab_alloc+0x12/0x20
  __kmalloc_track_caller+0x134/0x360
  kmemdup+0x20/0x50
  brcmf_cfg80211_attach+0x10b/0x3a90 [brcmfmac]
  brcmf_bus_start+0x19a/0x9a0 [brcmfmac]
  brcmf_pcie_setup+0x1f1a/0x3680 [brcmfmac]
  brcmf_fw_request_nvram_done+0x44c/0x11b0 [brcmfmac]
  request_firmware_work_func+0x135/0x280
  process_one_work+0x716/0x1a50
  worker_thread+0xe0/0x1460
  kthread+0x222/0x2e0
  ret_from_fork+0x2a/0x40
 Freed:
 PID = 2568
  save_stack_trace+0x1b/0x20
  save_stack+0x46/0xd0
  kasan_slab_free+0x71/0xb0
  kfree+0xe8/0x2e0
  brcmf_cfg80211_detach+0x62/0xf0 [brcmfmac]
  brcmf_detach+0x14a/0x2b0 [brcmfmac]
  brcmf_pcie_remove+0x140/0x5d0 [brcmfmac]
  brcmf_pcie_pm_leave_D3+0x198/0x2e0 [brcmfmac]
  pci_pm_resume+0x186/0x220
  dpm_run_callback+0x6e/0x4f0
  device_resume+0x1c2/0x670
  async_resume+0x1d/0x50
  async_run_entry_fn+0xfe/0x610
  process_one_work+0x716/0x1a50
  worker_thread+0xe0/0x1460
  kthread+0x222/0x2e0
  ret_from_fork+0x2a/0x40
 Memory state around the buggy address:
  ffff8803fc243f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffff8803fc244000: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 >ffff8803fc244080: fc fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                          ^
  ffff8803fc244100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  ffff8803fc244180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

What is happening is that brcmf_pcie_resume() detects a device that
is no longer responsive and it decides to unbind resulting in a
wiphy_unregister() and wiphy_free() call. Now the wiphy instance
remains allocated, because PM needs to call wiphy_resume() for it.
However, brcmfmac already does a kfree() for the struct
cfg80211_registered_device::ops field. Change the checks in
wiphy_resume() to only access the struct cfg80211_registered_device::ops
if the wiphy instance is still registered at this time.

Cc: stable@vger.kernel.org # 4.10.x, 4.9.x
Reported-by: Daniel J Blueman <daniel@quora.org>
Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/sysfs.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 16b6b5988be9..570a2b67ca10 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -132,12 +132,10 @@ static int wiphy_resume(struct device *dev)
 	/* Age scan results with time spent in suspend */
 	cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
 
-	if (rdev->ops->resume) {
-		rtnl_lock();
-		if (rdev->wiphy.registered)
-			ret = rdev_resume(rdev);
-		rtnl_unlock();
-	}
+	rtnl_lock();
+	if (rdev->wiphy.registered && rdev->ops->resume)
+		ret = rdev_resume(rdev);
+	rtnl_unlock();
 
 	return ret;
 }

From 77c1c03c5b8ef28e55bb0aff29b1e006037ca645 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Tue, 28 Mar 2017 22:59:25 +0800
Subject: [PATCH 093/410] netfilter: nfnetlink_queue: fix secctx memory leak

We must call security_release_secctx to free the memory returned by
security_secid_to_secctx, otherwise memory may be leaked forever.

Fixes: ef493bd930ae ("netfilter: nfnetlink_queue: add security context information")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_queue.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3ee0b8a000a4..933509ebf3d3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -443,7 +443,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb) {
 		skb_tx_error(entskb);
-		return NULL;
+		goto nlmsg_failure;
 	}
 
 	nlh = nlmsg_put(skb, 0, 0,
@@ -452,7 +452,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	if (!nlh) {
 		skb_tx_error(entskb);
 		kfree_skb(skb);
-		return NULL;
+		goto nlmsg_failure;
 	}
 	nfmsg = nlmsg_data(nlh);
 	nfmsg->nfgen_family = entry->state.pf;
@@ -598,12 +598,17 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	}
 
 	nlh->nlmsg_len = skb->len;
+	if (seclen)
+		security_release_secctx(secdata, seclen);
 	return skb;
 
 nla_put_failure:
 	skb_tx_error(entskb);
 	kfree_skb(skb);
 	net_err_ratelimited("nf_queue: error creating packet message\n");
+nlmsg_failure:
+	if (seclen)
+		security_release_secctx(secdata, seclen);
 	return NULL;
 }
 

From 9e1764309f577a88a0d5250fea6a080a6ad43556 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sat, 25 Mar 2017 11:32:43 +0000
Subject: [PATCH 094/410] drm/i915: Align "unfenced" tiled access on gen2,
 early gen3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Old devices have quite severe restrictions for using fences, and unlike
more recent device (anything from Pineview onwards) we need to enforce
those restrictions even for unfenced tiled access from the render
pipeline.

Fixes: 944397f04f24 ("drm/i915: Store required fence size/alignment for GGTT vma")
Reported-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.11-rc1+
Link: http://patchwork.freedesktop.org/patch/msgid/20170325113243.16438-1-chris@chris-wilson.co.uk
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit f4ce766f28cd0efa0cb4d869a84905d573ef7e70)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            | 1 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +++-
 drivers/gpu/drm/i915/i915_pci.c            | 5 +++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1e53c31b6826..46fcd8b7080a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -806,6 +806,7 @@ struct intel_csr {
 	func(has_resource_streamer); \
 	func(has_runtime_pm); \
 	func(has_snoop); \
+	func(unfenced_needs_alignment); \
 	func(cursor_needs_physical); \
 	func(hws_needs_physical); \
 	func(overlay_needs_physical); \
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 30e0675fd7da..15a15d00a6bf 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -888,6 +888,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
 	struct list_head ordered_vmas;
 	struct list_head pinned_vmas;
 	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
+	bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment;
 	int retry;
 
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
@@ -908,7 +909,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
 		if (!has_fenced_gpu_access)
 			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 		need_fence =
-			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+			(entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
+			 needs_unfenced_map) &&
 			i915_gem_object_is_tiled(obj);
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index ecb487b5356f..9bbbd4e83e3c 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -60,6 +60,7 @@
 	.has_overlay = 1, .overlay_needs_physical = 1, \
 	.has_gmch_display = 1, \
 	.hws_needs_physical = 1, \
+	.unfenced_needs_alignment = 1, \
 	.ring_mask = RENDER_RING, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	CURSOR_OFFSETS
@@ -101,6 +102,7 @@ static const struct intel_device_info intel_i915g_info = {
 	.platform = INTEL_I915G, .cursor_needs_physical = 1,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };
 
 static const struct intel_device_info intel_i915gm_info = {
@@ -112,6 +114,7 @@ static const struct intel_device_info intel_i915gm_info = {
 	.supports_tv = 1,
 	.has_fbc = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };
 
 static const struct intel_device_info intel_i945g_info = {
@@ -120,6 +123,7 @@ static const struct intel_device_info intel_i945g_info = {
 	.has_hotplug = 1, .cursor_needs_physical = 1,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };
 
 static const struct intel_device_info intel_i945gm_info = {
@@ -130,6 +134,7 @@ static const struct intel_device_info intel_i945gm_info = {
 	.supports_tv = 1,
 	.has_fbc = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };
 
 static const struct intel_device_info intel_g33_info = {

From 4e5f713ffc202c49a4374897cb0d2b218b391ff7 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 27 Mar 2017 21:34:59 +0100
Subject: [PATCH 095/410] drm/i915/perf: destroy stream on sample_flags
 mismatch

If we were to ever encounter a sample_flags mismatch we need to ensure
we destroy the stream when we bail.

Fixes: d79651522e89 ("drm/i915: Enable i915 perf stream for Haswell OA unit")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170327203459.18398-1-matthew.auld@intel.com
(cherry picked from commit 22f880ca8246c6c80c4f48731c6a7d5d15042f56)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index a1b7eec58be2..f8fcb317042e 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1705,7 +1705,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	 */
 	if (WARN_ON(stream->sample_flags != props->sample_flags)) {
 		ret = -ENODEV;
-		goto err_alloc;
+		goto err_flags;
 	}
 
 	list_add(&stream->link, &dev_priv->perf.streams);
@@ -1728,6 +1728,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 
 err_open:
 	list_del(&stream->link);
+err_flags:
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
 err_alloc:

From aa62acfd63e7367872291c15290cb9c29d140926 Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Mon, 27 Mar 2017 21:32:36 +0100
Subject: [PATCH 096/410] drm/i915/perf: remove user triggerable warn

Don't throw a warning if we are given an invalid property id. While
here let's also bring back Robert' original idea of catching unhandled
enumeration values at compile time.

Fixes: eec688e1420d ("drm/i915: Add i915 perf infrastructure")
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Robert Bragg <robert@sixbynine.org>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170327203236.18276-1-matthew.auld@intel.com
(cherry picked from commit 0a309f9e3dfaa4f5db0bf1b0cab54571744b491a)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index f8fcb317042e..70964ca9251e 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1794,6 +1794,11 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		if (ret)
 			return ret;
 
+		if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
+			DRM_DEBUG("Unknown i915 perf property ID\n");
+			return -EINVAL;
+		}
+
 		switch ((enum drm_i915_perf_property_id)id) {
 		case DRM_I915_PERF_PROP_CTX_HANDLE:
 			props->single_context = 1;
@@ -1863,9 +1868,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			props->oa_periodic = true;
 			props->oa_period_exponent = value;
 			break;
-		default:
+		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
-			DRM_DEBUG("Unknown i915 perf property ID\n");
 			return -EINVAL;
 		}
 

From 7d65f82954dadbbe7b6e1aec7e07ad17bc6d958b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 29 Mar 2017 14:15:24 +0200
Subject: [PATCH 097/410] mac80211: unconditionally start new netdev queues
 with iTXQ support

When internal mac80211 TXQs aren't supported, netdev queues must
always started out started even when driver queues are stopped
while the interface is added. This is necessary because with the
internal TXQ support netdev queues are never stopped and packet
scheduling/dropping is done in mac80211.

Cc: stable@vger.kernel.org # 4.9+
Fixes: 80a83cfc434b1 ("mac80211: skip netdev queue control with software queuing")
Reported-and-tested-by: Sven Eckelmann <sven.eckelmann@openmesh.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 40813dd3301c..5bb0c5012819 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -718,7 +718,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 	ieee80211_recalc_ps(local);
 
 	if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
-	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+	    local->ops->wake_tx_queue) {
 		/* XXX: for AP_VLAN, actually track AP queues */
 		netif_tx_start_all_queues(dev);
 	} else if (dev) {

From dd68f2ba0720e76c3a5bfa3f639c546f926792f5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 29 Mar 2017 13:13:15 +0100
Subject: [PATCH 098/410] drm/i915/execlists: Wrap tail pointer after reset
 tweaking

If the request->wa_tail is 0 (because it landed exactly on the end of
the ringbuffer), when we reconstruct request->tail following a reset we
fill in an illegal value (-8 or 0x001ffff8). As a result, RING_HEAD is
never able to catch up with RING_TAIL and the GPU spins endlessly. If
the ring contains a couple of breadcrumbs, even our hangcheck is unable
to catch the busy-looping as the ACTHD and seqno continually advance.

v2: Move the wrap into a common intel_ring_wrap().

Fixes: a3aabe86a340 ("drm/i915/execlists: Reinitialise context image after GPU hang")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Link: http://patchwork.freedesktop.org/patch/msgid/20170327130009.4678-1-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
(cherry picked from commit 450362d3fe866b14304f309b5fffba0c33fbfbc3)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170329121315.1290-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/intel_lrc.c        | 4 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.h | 8 +++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 471af3b480ad..91555d4e9129 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1440,7 +1440,9 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	GEM_BUG_ON(request->ctx != port[0].request->ctx);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
+	request->tail =
+		intel_ring_wrap(request->ring,
+				request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 13dccb18cd43..8cb2078c5bfc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -521,11 +521,17 @@ static inline void intel_ring_advance(struct intel_ring *ring)
 	 */
 }
 
+static inline u32
+intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+	return pos & (ring->size - 1);
+}
+
 static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr)
 {
 	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
 	u32 offset = addr - ring->vaddr;
-	return offset & (ring->size - 1);
+	return intel_ring_wrap(ring, offset);
 }
 
 int __intel_ring_space(int head, int tail, int size);

From 94d7ee0baa8b764cf64ad91ed69464c1a6a0066b Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 29 Mar 2017 08:44:59 +0200
Subject: [PATCH 099/410] l2tp: hold tunnel socket when handling control frames
 in l2tp_ip and l2tp_ip6

The code following l2tp_tunnel_find() expects that a new reference is
held on sk. Either sk_receive_skb() or the discard_put error path will
drop a reference from the tunnel's socket.

This issue exists in both l2tp_ip and l2tp_ip6.

Fixes: a3c18422a4b4 ("l2tp: hold socket before dropping lock in l2tp_ip{, 6}_recv()")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c  | 5 +++--
 net/l2tp/l2tp_ip6.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index d25038cfd64e..7208fbe5856b 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -178,9 +178,10 @@ pass_up:
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
 	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel != NULL)
+	if (tunnel) {
 		sk = tunnel->sock;
-	else {
+		sock_hold(sk);
+	} else {
 		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
 
 		read_lock_bh(&l2tp_ip_lock);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index a4abcbc4c09a..516d7ce24ba7 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -191,9 +191,10 @@ pass_up:
 
 	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
 	tunnel = l2tp_tunnel_find(net, tunnel_id);
-	if (tunnel != NULL)
+	if (tunnel) {
 		sk = tunnel->sock;
-	else {
+		sock_hold(sk);
+	} else {
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 
 		read_lock_bh(&l2tp_ip6_lock);

From e91793bb615cf6cdd59c0b6749fe173687bb0947 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Wed, 29 Mar 2017 08:45:29 +0200
Subject: [PATCH 100/410] l2tp: purge socket queues in the .destruct() callback

The Rx path may grab the socket right before pppol2tp_release(), but
nothing guarantees that it will enqueue packets before
skb_queue_purge(). Therefore, the socket can be destroyed without its
queues fully purged.

Fix this by purging queues in pppol2tp_session_destruct() where we're
guaranteed nothing is still referencing the socket.

Fixes: 9e9cb6221aa7 ("l2tp: fix userspace reception on plain L2TP sockets")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 36cc56fd0418..123b6a2411a0 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session)
 static void pppol2tp_session_destruct(struct sock *sk)
 {
 	struct l2tp_session *session = sk->sk_user_data;
+
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+
 	if (session) {
 		sk->sk_user_data = NULL;
 		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
@@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock)
 		l2tp_session_queue_purge(session);
 		sock_put(sk);
 	}
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
-
 	release_sock(sk);
 
 	/* This will delete the session context via

From 916a008b4b8ecc02fbd035cfb133773dba1ff3d7 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Wed, 29 Mar 2017 17:12:47 +0100
Subject: [PATCH 101/410] ARM: dma-mapping: disallow dma_get_sgtable() for
 non-kernel managed memory

dma_get_sgtable() tries to create a scatterlist table containing valid
struct page pointers for the coherent memory allocation passed in to it.

However, memory can be declared via dma_declare_coherent_memory(), or
via other reservation schemes which means that coherent memory is not
guaranteed to be backed by struct pages.  In such cases, the resulting
scatterlist table contains pointers to invalid pages, which causes
kernel oops later.

This patch adds detection of such memory, and refuses to create a
scatterlist table for such memory.

Reported-by: Shuah Khan <shuahkhan@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/dma-mapping.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 63eabb06f9f1..475811f5383a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -935,13 +935,31 @@ static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_add
 	__arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
 }
 
+/*
+ * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
+ * that the intention is to allow exporting memory allocated via the
+ * coherent DMA APIs through the dma_buf API, which only accepts a
+ * scattertable.  This presents a couple of problems:
+ * 1. Not all memory allocated via the coherent DMA APIs is backed by
+ *    a struct page
+ * 2. Passing coherent DMA memory into the streaming APIs is not allowed
+ *    as we will try to flush the memory through a different alias to that
+ *    actually being used (and the flushes are redundant.)
+ */
 int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
 		 void *cpu_addr, dma_addr_t handle, size_t size,
 		 unsigned long attrs)
 {
-	struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
+	unsigned long pfn = dma_to_pfn(dev, handle);
+	struct page *page;
 	int ret;
 
+	/* If the PFN is not valid, we do not have a struct page */
+	if (!pfn_valid(pfn))
+		return -ENXIO;
+
+	page = pfn_to_page(pfn);
+
 	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
 	if (unlikely(ret))
 		return ret;

From 3cc070c1c81948b33ebe2ea68cd39307ce2b312d Mon Sep 17 00:00:00 2001
From: afzal mohammed <afzal.mohd.ma@gmail.com>
Date: Thu, 23 Mar 2017 13:49:32 +0100
Subject: [PATCH 102/410] ARM: 8665/1: nommu: access ID_PFR1 only if CPUID
 scheme

Greg upon trying to boot no-MMU Kernel on ARM926EJ reported boot
failure. He root caused it to ID_PFR1 access introduced by the
commit mentioned in the fixes tag below.

All CP15 processors need not have processor feature registers, only
for architectures defined by CPUID scheme would have it. Hence check
for it before accessing processor feature register, ID_PFR1.

Fixes: f8300a0b5de0 ("ARM: 8647/2: nommu: dynamic exception base address setting")
Reported-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: afzal mohammed <afzal.mohd.ma@gmail.com>
Tested-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/nommu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
index 3b5c7aaf9c76..33a45bd96860 100644
--- a/arch/arm/mm/nommu.c
+++ b/arch/arm/mm/nommu.c
@@ -303,7 +303,10 @@ static inline void set_vbar(unsigned long val)
  */
 static inline bool security_extensions_enabled(void)
 {
-	return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4);
+	/* Check CPUID Identification Scheme before ID_PFR1 read */
+	if ((read_cpuid_id() & 0x000f0000) == 0x000f0000)
+		return !!cpuid_feature_extract(CPUID_EXT_PFR1, 4);
+	return 0;
 }
 
 static unsigned long __init setup_vectors_base(void)

From 2247925f0942dc4e7c09b1cde45ca18461d94c5f Mon Sep 17 00:00:00 2001
From: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
Date: Tue, 28 Mar 2017 19:47:29 -0400
Subject: [PATCH 103/410] bnxt_en: Fix NULL pointer dereference in reopen
 failure path

Net device reset can fail when the h/w or f/w is in a bad state.
Subsequent netdevice open fails in bnxt_hwrm_stat_ctx_alloc().
The cleanup invokes bnxt_hwrm_resource_free() which inturn
calls bnxt_disable_int().  In this routine, the code segment

if (ring->fw_ring_id != INVALID_HW_RING_ID)
   BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons);

results in NULL pointer dereference as cpr->cp_doorbell is not yet
initialized, and fw_ring_id is zero.

The fix is to initialize cpr fw_ring_id to INVALID_HW_RING_ID before
bnxt_init_chip() is invoked.

Signed-off-by: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 32de4589d16a..7ee772483f26 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2455,6 +2455,18 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr)
 	return 0;
 }
 
+static void bnxt_init_cp_rings(struct bnxt *bp)
+{
+	int i;
+
+	for (i = 0; i < bp->cp_nr_rings; i++) {
+		struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring;
+		struct bnxt_ring_struct *ring = &cpr->cp_ring_struct;
+
+		ring->fw_ring_id = INVALID_HW_RING_ID;
+	}
+}
+
 static int bnxt_init_rx_rings(struct bnxt *bp)
 {
 	int i, rc = 0;
@@ -5006,6 +5018,7 @@ static int bnxt_shutdown_nic(struct bnxt *bp, bool irq_re_init)
 
 static int bnxt_init_nic(struct bnxt *bp, bool irq_re_init)
 {
+	bnxt_init_cp_rings(bp);
 	bnxt_init_rx_rings(bp);
 	bnxt_init_tx_rings(bp);
 	bnxt_init_ring_grps(bp, irq_re_init);

From 23e12c893489ed12ecfccbf866fc62af1bead4b0 Mon Sep 17 00:00:00 2001
From: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
Date: Tue, 28 Mar 2017 19:47:30 -0400
Subject: [PATCH 104/410] bnxt_en: Correct the order of arguments to
 netdev_err() in bnxt_set_tpa()

Signed-off-by: Sankar Patchineelam <sankar.patchineelam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7ee772483f26..6c8568780a6a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4744,7 +4744,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa)
 		rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags);
 		if (rc) {
 			netdev_err(bp->dev, "hwrm vnic set tpa failure rc for vnic %d: %x\n",
-				   rc, i);
+				   i, rc);
 			return rc;
 		}
 	}

From 3ed3a83e3f3871c57b18cef09b148e96921236ed Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 28 Mar 2017 19:47:31 -0400
Subject: [PATCH 105/410] bnxt_en: Fix DMA unmapping of the RX buffers in XDP
 mode during shutdown.

In bnxt_free_rx_skbs(), which is called to free up all RX buffers during
shutdown, we need to unmap the page if we are running in XDP mode.

Fixes: c61fb99cae51 ("bnxt_en: Add RX page mode support.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6c8568780a6a..1f1e54ba0ecb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1983,20 +1983,25 @@ static void bnxt_free_rx_skbs(struct bnxt *bp)
 
 		for (j = 0; j < max_idx; j++) {
 			struct bnxt_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[j];
+			dma_addr_t mapping = rx_buf->mapping;
 			void *data = rx_buf->data;
 
 			if (!data)
 				continue;
 
-			dma_unmap_single(&pdev->dev, rx_buf->mapping,
-					 bp->rx_buf_use_size, bp->rx_dir);
-
 			rx_buf->data = NULL;
 
-			if (BNXT_RX_PAGE_MODE(bp))
+			if (BNXT_RX_PAGE_MODE(bp)) {
+				mapping -= bp->rx_dma_offset;
+				dma_unmap_page(&pdev->dev, mapping,
+					       PAGE_SIZE, bp->rx_dir);
 				__free_page(data);
-			else
+			} else {
+				dma_unmap_single(&pdev->dev, mapping,
+						 bp->rx_buf_use_size,
+						 bp->rx_dir);
 				kfree(data);
+			}
 		}
 
 		for (j = 0; j < max_agg_idx; j++) {

From 358e78b5f445c35f5b7f9241425fb499ee9fe3e2 Mon Sep 17 00:00:00 2001
From: Zakharov Vlad <Vladislav.Zakharov@synopsys.com>
Date: Wed, 29 Mar 2017 13:41:46 +0300
Subject: [PATCH 106/410] ezchip: nps_enet: check if napi has been completed

After a new NAPI_STATE_MISSED state was added to NAPI we can get into
this state and in such case we have to reschedule NAPI as some work is
still pending and we have to process it. napi_complete_done() function
returns false if we have to reschedule something (e.g. in case we were
in MISSED state) as current polling have not been completed yet.

nps_enet driver hasn't been verifying the return value of
napi_complete_done() and has been forcibly enabling interrupts. That is
not correct as we should not enable interrupts before we have processed
all scheduled work. As a result we were getting trapped in interrupt
hanlder chain as we had never been able to disabale ethernet
interrupts again.

So this patch makes nps_enet_poll() func verify return value of
napi_complete_done() and enable interrupts only in case all scheduled
work has been completed.

Signed-off-by: Vlad Zakharov <vzakhar@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ezchip/nps_enet.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index 992ebe973d25..f819843e2bae 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -189,11 +189,9 @@ static int nps_enet_poll(struct napi_struct *napi, int budget)
 
 	nps_enet_tx_handler(ndev);
 	work_done = nps_enet_rx_handler(ndev);
-	if (work_done < budget) {
+	if ((work_done < budget) && napi_complete_done(napi, work_done)) {
 		u32 buf_int_enable_value = 0;
 
-		napi_complete_done(napi, work_done);
-
 		/* set tx_done and rx_rdy bits */
 		buf_int_enable_value |= NPS_ENET_ENABLE << RX_RDY_SHIFT;
 		buf_int_enable_value |= NPS_ENET_ENABLE << TX_DONE_SHIFT;

From b79c52aef3cdee903017c1e9834b53996d70010e Mon Sep 17 00:00:00 2001
From: Zhi Wang <zhi.a.wang@intel.com>
Date: Thu, 30 Mar 2017 01:48:39 +0800
Subject: [PATCH 107/410] drm/i915/gvt: Activate/de-activate vGPU in mdev ops.

This patch introduces two functions for activating/de-activating vGPU in
mdev ops.

A racing condition was found between virtual vblank emulation and KVGMT
mdev release path. V-blank emulation will emulate and inject V-blank
interrupt for every active vGPU with holding gvt->lock, while in mdev
release path, it will directly release hypervisor handle without changing
vGPU status or taking gvt->lock, so a kernel oops is encountered when
vblank emulation is injecting a interrupt with a invalid hypervisor
handle. (Reported by Terrence)

To solve this problem, we factor out vGPU activation/de-activation from
vGPU creation/destruction path and let KVMGT mdev release ops de-activate
the vGPU before release hypervisor handle. Once a vGPU is de-activated,
GVT-g will not emulate v-blank for it or touch the hypervisor handle.

Fixes: 659643f ("drm/i915/gvt/kvmgt: add vfio/mdev support to KVMGT")
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gvt.c   |  2 ++
 drivers/gpu/drm/i915/gvt/gvt.h   |  5 ++-
 drivers/gpu/drm/i915/gvt/kvmgt.c |  4 +++
 drivers/gpu/drm/i915/gvt/vgpu.c  | 53 ++++++++++++++++++++++++++------
 4 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 3b9d59e457ba..ef3baa0c4754 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -52,6 +52,8 @@ static const struct intel_gvt_ops intel_gvt_ops = {
 	.vgpu_create = intel_gvt_create_vgpu,
 	.vgpu_destroy = intel_gvt_destroy_vgpu,
 	.vgpu_reset = intel_gvt_reset_vgpu,
+	.vgpu_activate = intel_gvt_activate_vgpu,
+	.vgpu_deactivate = intel_gvt_deactivate_vgpu,
 };
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 23791920ced1..2387eacf74bb 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -382,7 +382,8 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
 void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 				 unsigned int engine_mask);
 void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu);
-
+void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu);
+void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu);
 
 /* validating GM functions */
 #define vgpu_gmadr_is_aperture(vgpu, gmadr) \
@@ -449,6 +450,8 @@ struct intel_gvt_ops {
 				struct intel_vgpu_type *);
 	void (*vgpu_destroy)(struct intel_vgpu *);
 	void (*vgpu_reset)(struct intel_vgpu *);
+	void (*vgpu_activate)(struct intel_vgpu *);
+	void (*vgpu_deactivate)(struct intel_vgpu *);
 };
 
 
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index d641214578a7..9843d74056a8 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -544,6 +544,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
 	if (ret)
 		goto undo_group;
 
+	intel_gvt_ops->vgpu_activate(vgpu);
+
 	atomic_set(&vgpu->vdev.released, 0);
 	return ret;
 
@@ -569,6 +571,8 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
 	if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
 		return;
 
+	intel_gvt_ops->vgpu_deactivate(vgpu);
+
 	ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
 					&vgpu->vdev.iommu_notifier);
 	WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 41cfa5ccae84..2f5792a1ce38 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -178,6 +178,47 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
 	}
 }
 
+/**
+ * intel_gvt_active_vgpu - activate a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to activate a virtual GPU.
+ *
+ */
+void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
+{
+	mutex_lock(&vgpu->gvt->lock);
+	vgpu->active = true;
+	mutex_unlock(&vgpu->gvt->lock);
+}
+
+/**
+ * intel_gvt_deactive_vgpu - deactivate a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to deactivate a virtual GPU.
+ * All virtual GPU runtime information will be destroyed.
+ *
+ */
+void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+
+	mutex_lock(&gvt->lock);
+
+	vgpu->active = false;
+
+	if (atomic_read(&vgpu->running_workload_num)) {
+		mutex_unlock(&gvt->lock);
+		intel_gvt_wait_vgpu_idle(vgpu);
+		mutex_lock(&gvt->lock);
+	}
+
+	intel_vgpu_stop_schedule(vgpu);
+
+	mutex_unlock(&gvt->lock);
+}
+
 /**
  * intel_gvt_destroy_vgpu - destroy a virtual GPU
  * @vgpu: virtual GPU
@@ -191,16 +232,9 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
 
 	mutex_lock(&gvt->lock);
 
-	vgpu->active = false;
+	WARN(vgpu->active, "vGPU is still active!\n");
+
 	idr_remove(&gvt->vgpu_idr, vgpu->id);
-
-	if (atomic_read(&vgpu->running_workload_num)) {
-		mutex_unlock(&gvt->lock);
-		intel_gvt_wait_vgpu_idle(vgpu);
-		mutex_lock(&gvt->lock);
-	}
-
-	intel_vgpu_stop_schedule(vgpu);
 	intel_vgpu_clean_sched_policy(vgpu);
 	intel_vgpu_clean_gvt_context(vgpu);
 	intel_vgpu_clean_execlist(vgpu);
@@ -277,7 +311,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_clean_shadow_ctx;
 
-	vgpu->active = true;
 	mutex_unlock(&gvt->lock);
 
 	return vgpu;

From d09c5373e8e4eaaa09233552cbf75dc4c4f21203 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 27 Mar 2017 09:48:04 +0200
Subject: [PATCH 108/410] s390/uaccess: get_user() should zero on failure
 (again)

Commit fd2d2b191fe7 ("s390: get_user() should zero on failure")
intended to fix s390's get_user() implementation which did not zero
the target operand if the read from user space faulted. Unfortunately
the patch has no effect: the corresponding inline assembly specifies
that the operand is only written to ("=") and the previous value is
discarded.

Therefore the compiler is free to and actually does omit the zero
initialization.

To fix this simply change the contraint modifier to "+", so the
compiler cannot omit the initialization anymore.

Fixes: c9ca78415ac1 ("s390/uaccess: provide inline variants of get_user/put_user")
Fixes: fd2d2b191fe7 ("s390: get_user() should zero on failure")
Cc: stable@vger.kernel.org
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 136932ff4250..3ea1554d04b3 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -147,7 +147,7 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from,
 		"	jg	2b\n"				\
 		".popsection\n"					\
 		EX_TABLE(0b,3b) EX_TABLE(1b,3b)			\
-		: "=d" (__rc), "=Q" (*(to))			\
+		: "=d" (__rc), "+Q" (*(to))			\
 		: "d" (size), "Q" (*(from)),			\
 		  "d" (__reg0), "K" (-EFAULT)			\
 		: "cc");					\

From 740372b76e7966604e0f4dd0de13135513024f0d Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Mon, 20 Mar 2017 21:04:05 -0700
Subject: [PATCH 109/410] tcmu: Allow cmd_time_out to be set to zero (disabled)

The new cmd_time_out configfs attribute for TCMU is allowed to
be disabled, so go ahead and drop the tcmu_cmd_time_out_store()
check.

Reported-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index c6874c38a10b..6a17c78e4662 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1196,11 +1196,6 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag
 	if (ret < 0)
 		return ret;
 
-	if (!val) {
-		pr_err("Illegal value for cmd_time_out\n");
-		return -EINVAL;
-	}
-
 	udev->cmd_time_out = val * MSEC_PER_SEC;
 	return count;
 }

From afea03fcf3d5db8a968f4b97797b8b83ada0dba3 Mon Sep 17 00:00:00 2001
From: Manish Narani <manish.narani@xilinx.com>
Date: Mon, 20 Mar 2017 15:05:29 +0530
Subject: [PATCH 110/410] usb: gadget: Correct usb EP argument for BOT status
 request

This patch corrects the argument in usb_ep_free_request as it is
mistakenly set to ep_out. It should be ep_in for status request.

Signed-off-by: Manish Narani <mnarani@xilinx.com>
Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/usb/gadget/function/f_tcm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index d2351139342f..a82e2bd5ea34 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -373,7 +373,7 @@ static void bot_cleanup_old_alt(struct f_uas *fu)
 	usb_ep_free_request(fu->ep_in, fu->bot_req_in);
 	usb_ep_free_request(fu->ep_out, fu->bot_req_out);
 	usb_ep_free_request(fu->ep_out, fu->cmd.req);
-	usb_ep_free_request(fu->ep_out, fu->bot_status.req);
+	usb_ep_free_request(fu->ep_in, fu->bot_status.req);
 
 	kfree(fu->cmd.buf);
 

From efb2ea770bb3b0f40007530bc8b0c22f36e1c5eb Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 23 Mar 2017 17:19:24 -0700
Subject: [PATCH 111/410] iscsi-target: Fix TMR reference leak during session
 shutdown

This patch fixes a iscsi-target specific TMR reference leak
during session shutdown, that could occur when a TMR was
quiesced before the hand-off back to iscsi-target code
via transport_cmd_check_stop_to_fabric().

The reference leak happens because iscsit_free_cmd() was
incorrectly skipping the final target_put_sess_cmd() for
TMRs when transport_generic_free_cmd() returned zero because
the se_cmd->cmd_kref did not reach zero, due to the missing
se_cmd assignment in original code.

The result was iscsi_cmd and it's associated se_cmd memory
would be freed once se_sess->sess_cmd_map where released,
but the associated se_tmr_req was leaked and remained part
of se_device->dev_tmr_list.

This bug would manfiest itself as kernel paging request
OOPsen in core_tmr_lun_reset(), when a left-over se_tmr_req
attempted to dereference it's se_cmd pointer that had
already been released during normal session shutdown.

To address this bug, go ahead and treat ISCSI_OP_SCSI_CMD
and ISCSI_OP_SCSI_TMFUNC the same when there is an extra
se_cmd->cmd_kref to drop in iscsit_free_cmd(), and use
op_scsi to signal __iscsit_free_cmd() when the former
needs to clear any further iscsi related I/O state.

Reported-by: Rob Millner <rlm@daterainc.com>
Cc: Rob Millner <rlm@daterainc.com>
Reported-by: Chu Yuan Lin <cyl@datera.io>
Cc: Chu Yuan Lin <cyl@datera.io>
Tested-by: Chu Yuan Lin <cyl@datera.io>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_util.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 5041a9c8bdcb..b4640338f8d8 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -737,21 +737,23 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
 {
 	struct se_cmd *se_cmd = NULL;
 	int rc;
+	bool op_scsi = false;
 	/*
 	 * Determine if a struct se_cmd is associated with
 	 * this struct iscsi_cmd.
 	 */
 	switch (cmd->iscsi_opcode) {
 	case ISCSI_OP_SCSI_CMD:
-		se_cmd = &cmd->se_cmd;
-		__iscsit_free_cmd(cmd, true, shutdown);
+		op_scsi = true;
 		/*
 		 * Fallthrough
 		 */
 	case ISCSI_OP_SCSI_TMFUNC:
-		rc = transport_generic_free_cmd(&cmd->se_cmd, shutdown);
-		if (!rc && shutdown && se_cmd && se_cmd->se_sess) {
-			__iscsit_free_cmd(cmd, true, shutdown);
+		se_cmd = &cmd->se_cmd;
+		__iscsit_free_cmd(cmd, op_scsi, shutdown);
+		rc = transport_generic_free_cmd(se_cmd, shutdown);
+		if (!rc && shutdown && se_cmd->se_sess) {
+			__iscsit_free_cmd(cmd, op_scsi, shutdown);
 			target_put_sess_cmd(se_cmd);
 		}
 		break;

From 49cb77e297dc611a1b795cfeb79452b3002bd331 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Mon, 27 Mar 2017 16:12:43 -0700
Subject: [PATCH 112/410] target: Avoid mappedlun symlink creation during lun
 shutdown

This patch closes a race between se_lun deletion during configfs
unlink in target_fabric_port_unlink() -> core_dev_del_lun()
-> core_tpg_remove_lun(), when transport_clear_lun_ref() blocks
waiting for percpu_ref RCU grace period to finish, but a new
NodeACL mappedlun is added before the RCU grace period has
completed.

This can happen in target_fabric_mappedlun_link() because it
only checks for se_lun->lun_se_dev, which is not cleared until
after transport_clear_lun_ref() percpu_ref RCU grace period
finishes.

This bug originally manifested as NULL pointer dereference
OOPsen in target_stat_scsi_att_intr_port_show_attr_dev() on
v4.1.y code, because it dereferences lun->lun_se_dev without
a explicit NULL pointer check.

In post v4.1 code with target-core RCU conversion, the code
in target_stat_scsi_att_intr_port_show_attr_dev() no longer
uses se_lun->lun_se_dev, but the same race still exists.

To address the bug, go ahead and set se_lun>lun_shutdown as
early as possible in core_tpg_remove_lun(), and ensure new
NodeACL mappedlun creation in target_fabric_mappedlun_link()
fails during se_lun shutdown.

Reported-by: James Shen <jcs@datera.io>
Cc: James Shen <jcs@datera.io>
Tested-by: James Shen <jcs@datera.io>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_fabric_configfs.c | 5 +++++
 drivers/target/target_core_tpg.c             | 4 ++++
 include/target/target_core_base.h            | 1 +
 3 files changed, 10 insertions(+)

diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index d8a16ca6baa5..d1e6cab8e3d3 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -92,6 +92,11 @@ static int target_fabric_mappedlun_link(
 		pr_err("Source se_lun->lun_se_dev does not exist\n");
 		return -EINVAL;
 	}
+	if (lun->lun_shutdown) {
+		pr_err("Unable to create mappedlun symlink because"
+			" lun->lun_shutdown=true\n");
+		return -EINVAL;
+	}
 	se_tpg = lun->lun_tpg;
 
 	nacl_ci = &lun_acl_ci->ci_parent->ci_group->cg_item;
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 6fb191914f45..dfaef4d3b2d2 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -642,6 +642,8 @@ void core_tpg_remove_lun(
 	 */
 	struct se_device *dev = rcu_dereference_raw(lun->lun_se_dev);
 
+	lun->lun_shutdown = true;
+
 	core_clear_lun_from_tpg(lun, tpg);
 	/*
 	 * Wait for any active I/O references to percpu se_lun->lun_ref to
@@ -663,6 +665,8 @@ void core_tpg_remove_lun(
 	}
 	if (!(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE))
 		hlist_del_rcu(&lun->link);
+
+	lun->lun_shutdown = false;
 	mutex_unlock(&tpg->tpg_lun_mutex);
 
 	percpu_ref_exit(&lun->lun_ref);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 4b784b6e21c0..2e282461cfa5 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -705,6 +705,7 @@ struct se_lun {
 	u64			unpacked_lun;
 #define SE_LUN_LINK_MAGIC			0xffff7771
 	u32			lun_link_magic;
+	bool			lun_shutdown;
 	bool			lun_access_ro;
 	u32			lun_index;
 

From ab22d2604c86ceb01bb2725c9860b88a7dd383bb Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Mon, 27 Mar 2017 17:07:40 +0800
Subject: [PATCH 113/410] tcmu: Fix possible overwrite of t_data_sg's last
 iov[]

If there has BIDI data, its first iov[] will overwrite the last
iov[] for se_cmd->t_data_sg.

To fix this, we can just increase the iov pointer, but this may
introuduce a new memory leakage bug: If the se_cmd->data_length
and se_cmd->t_bidi_data_sg->length are all not aligned up to the
DATA_BLOCK_SIZE, the actual length needed maybe larger than just
sum of them.

So, this could be avoided by rounding all the data lengthes up
to DATA_BLOCK_SIZE.

Reviewed-by: Mike Christie <mchristi@redhat.com>
Tested-by: Ilias Tsitsimpis <iliastsi@arrikto.com>
Reviewed-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Cc: stable@vger.kernel.org # 3.18+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 34 +++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 6a17c78e4662..e58dfd4fe448 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -394,6 +394,20 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d
 	return true;
 }
 
+static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd)
+{
+	struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
+	size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE);
+
+	if (se_cmd->se_cmd_flags & SCF_BIDI) {
+		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
+		data_length += round_up(se_cmd->t_bidi_data_sg->length,
+				DATA_BLOCK_SIZE);
+	}
+
+	return data_length;
+}
+
 static sense_reason_t
 tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 {
@@ -407,7 +421,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	uint32_t cmd_head;
 	uint64_t cdb_off;
 	bool copy_to_data_area;
-	size_t data_length;
+	size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
 	DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS);
 
 	if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
@@ -433,11 +447,6 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
 	mb = udev->mb_addr;
 	cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
-	data_length = se_cmd->data_length;
-	if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
-		data_length += se_cmd->t_bidi_data_sg->length;
-	}
 	if ((command_size > (udev->cmdr_size / 2)) ||
 	    data_length > udev->data_size) {
 		pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu "
@@ -511,11 +520,14 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	entry->req.iov_dif_cnt = 0;
 
 	/* Handle BIDI commands */
-	iov_cnt = 0;
-	alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg,
-		se_cmd->t_bidi_data_nents, &iov, &iov_cnt, false);
-	entry->req.iov_bidi_cnt = iov_cnt;
-
+	if (se_cmd->se_cmd_flags & SCF_BIDI) {
+		iov_cnt = 0;
+		iov++;
+		alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg,
+				se_cmd->t_bidi_data_nents, &iov, &iov_cnt,
+				false);
+		entry->req.iov_bidi_cnt = iov_cnt;
+	}
 	/* cmd's data_bitmap is what changed in process */
 	bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap,
 			DATA_BLOCK_BITS);

From abe342a5b4b5aa579f6bf40ba73447c699e6b579 Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Mon, 27 Mar 2017 17:07:41 +0800
Subject: [PATCH 114/410] tcmu: Fix wrongly calculating of the
 base_command_size

The t_data_nents and t_bidi_data_nents are the numbers of the
segments, but it couldn't be sure the block size equals to size
of the segment.

For the worst case, all the blocks are discontiguous and there
will need the same number of iovecs, that's to say: blocks == iovs.
So here just set the number of iovs to block count needed by tcmu
cmd.

Tested-by: Ilias Tsitsimpis <iliastsi@arrikto.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Cc: stable@vger.kernel.org # 3.18+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index e58dfd4fe448..9885d1b521fe 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -408,6 +408,13 @@ static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd)
 	return data_length;
 }
 
+static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd)
+{
+	size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd);
+
+	return data_length / DATA_BLOCK_SIZE;
+}
+
 static sense_reason_t
 tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 {
@@ -435,8 +442,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 	 * expensive to tell how many regions are freed in the bitmap
 	*/
 	base_command_size = max(offsetof(struct tcmu_cmd_entry,
-				req.iov[se_cmd->t_bidi_data_nents +
-					se_cmd->t_data_nents]),
+				req.iov[tcmu_cmd_get_block_cnt(tcmu_cmd)]),
 				sizeof(struct tcmu_cmd_entry));
 	command_size = base_command_size
 		+ round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);

From 8a146fbe1f1461aebc9b8f06a0f22e1a8a4f0dd1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 24 Mar 2017 11:08:47 +0100
Subject: [PATCH 115/410] gpio: acpi: Call enable_irq_wake for _IAE GpioInts
 with Wake set

On Bay Trail / Cherry Trail systems with a LID switch, the LID switch is
often connect to a gpioint handled by an _IAE event handler.
Before this commit such systems would not wake up when opening the lid,
requiring the powerbutton to be pressed after opening the lid to wakeup.

Note that Bay Trail / Cherry Trail systems use suspend-to-idle, so
the interrupts are generated anyway on those lines on lid switch changes,
but they are treated by the IRQ subsystem as spurious while suspended if
not marked as wakeup IRQs.

This commit calls enable_irq_wake() for _IAE GpioInts with a valid
event handler which have their Wake flag set. This fixes such systems
not waking up when opening the lid.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 9b37a3692b3f..8e318f449d23 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -266,6 +266,9 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 		goto fail_free_event;
 	}
 
+	if (agpio->wake_capable == ACPI_WAKE_CAPABLE)
+		enable_irq_wake(irq);
+
 	list_add_tail(&event->node, &acpi_gpio->events);
 	return AE_OK;
 
@@ -339,6 +342,9 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
 	list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) {
 		struct gpio_desc *desc;
 
+		if (irqd_is_wakeup_set(irq_get_irq_data(event->irq)))
+			disable_irq_wake(event->irq);
+
 		free_irq(event->irq, event);
 		desc = event->desc;
 		if (WARN_ON(IS_ERR(desc)))

From 693bdaa164b40b7aa6018b98af6f7e40dbd52457 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 23 Mar 2017 13:21:38 -0700
Subject: [PATCH 116/410] ACPI / gpio: do not fall back to parsing _CRS when we
 get a deferral

If, while locating GPIOs by name, we get probe deferral, we should
immediately report it to caller rather than trying to fall back to parsing
unnamed GPIOs from _CRS block.

Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-and-Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 8e318f449d23..2bd683e2be02 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -577,8 +577,10 @@ struct gpio_desc *acpi_find_gpio(struct device *dev,
 		}
 
 		desc = acpi_get_gpiod_by_index(adev, propname, idx, &info);
-		if (!IS_ERR(desc) || (PTR_ERR(desc) == -EPROBE_DEFER))
+		if (!IS_ERR(desc))
 			break;
+		if (PTR_ERR(desc) == -EPROBE_DEFER)
+			return ERR_CAST(desc);
 	}
 
 	/* Then from plain _CRS GPIOs */

From f7652afa8eadb416b23eb57dec6f158529942041 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 27 Mar 2017 11:09:08 +0200
Subject: [PATCH 117/410] drm/vmwgfx: Type-check lookups of fence objects

A malicious caller could otherwise hand over handles to other objects
causing all sorts of interesting problems.

Testing done: Ran a Fedora 25 desktop using both Xorg and
gnome-shell/Wayland.

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 75 +++++++++++++++++----------
 1 file changed, 49 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 6541dd8b82dc..4076063e0fdd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -538,7 +538,7 @@ int vmw_fence_create(struct vmw_fence_manager *fman,
 		     struct vmw_fence_obj **p_fence)
 {
 	struct vmw_fence_obj *fence;
-	int ret;
+ 	int ret;
 
 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
 	if (unlikely(fence == NULL))
@@ -701,6 +701,41 @@ void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
 }
 
 
+/**
+ * vmw_fence_obj_lookup - Look up a user-space fence object
+ *
+ * @tfile: A struct ttm_object_file identifying the caller.
+ * @handle: A handle identifying the fence object.
+ * @return: A struct vmw_user_fence base ttm object on success or
+ * an error pointer on failure.
+ *
+ * The fence object is looked up and type-checked. The caller needs
+ * to have opened the fence object first, but since that happens on
+ * creation and fence objects aren't shareable, that's not an
+ * issue currently.
+ */
+static struct ttm_base_object *
+vmw_fence_obj_lookup(struct ttm_object_file *tfile, u32 handle)
+{
+	struct ttm_base_object *base = ttm_base_object_lookup(tfile, handle);
+
+	if (!base) {
+		pr_err("Invalid fence object handle 0x%08lx.\n",
+		       (unsigned long)handle);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (base->refcount_release != vmw_user_fence_base_release) {
+		pr_err("Invalid fence object handle 0x%08lx.\n",
+		       (unsigned long)handle);
+		ttm_base_object_unref(&base);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return base;
+}
+
+
 int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
@@ -726,13 +761,9 @@ int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
 		arg->kernel_cookie = jiffies + wait_timeout;
 	}
 
-	base = ttm_base_object_lookup(tfile, arg->handle);
-	if (unlikely(base == NULL)) {
-		printk(KERN_ERR "Wait invalid fence object handle "
-		       "0x%08lx.\n",
-		       (unsigned long)arg->handle);
-		return -EINVAL;
-	}
+	base = vmw_fence_obj_lookup(tfile, arg->handle);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 
 	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
 
@@ -771,13 +802,9 @@ int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	struct vmw_private *dev_priv = vmw_priv(dev);
 
-	base = ttm_base_object_lookup(tfile, arg->handle);
-	if (unlikely(base == NULL)) {
-		printk(KERN_ERR "Fence signaled invalid fence object handle "
-		       "0x%08lx.\n",
-		       (unsigned long)arg->handle);
-		return -EINVAL;
-	}
+	base = vmw_fence_obj_lookup(tfile, arg->handle);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 
 	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
 	fman = fman_from_fence(fence);
@@ -1024,6 +1051,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 		(struct drm_vmw_fence_event_arg *) data;
 	struct vmw_fence_obj *fence = NULL;
 	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
+	struct ttm_object_file *tfile = vmw_fp->tfile;
 	struct drm_vmw_fence_rep __user *user_fence_rep =
 		(struct drm_vmw_fence_rep __user *)(unsigned long)
 		arg->fence_rep;
@@ -1037,15 +1065,11 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 	 */
 	if (arg->handle) {
 		struct ttm_base_object *base =
-			ttm_base_object_lookup_for_ref(dev_priv->tdev,
-						       arg->handle);
+			vmw_fence_obj_lookup(tfile, arg->handle);
+
+		if (IS_ERR(base))
+			return PTR_ERR(base);
 
-		if (unlikely(base == NULL)) {
-			DRM_ERROR("Fence event invalid fence object handle "
-				  "0x%08lx.\n",
-				  (unsigned long)arg->handle);
-			return -EINVAL;
-		}
 		fence = &(container_of(base, struct vmw_user_fence,
 				       base)->fence);
 		(void) vmw_fence_obj_reference(fence);
@@ -1053,7 +1077,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 		if (user_fence_rep != NULL) {
 			bool existed;
 
-			ret = ttm_ref_object_add(vmw_fp->tfile, base,
+			ret = ttm_ref_object_add(tfile, base,
 						 TTM_REF_USAGE, &existed);
 			if (unlikely(ret != 0)) {
 				DRM_ERROR("Failed to reference a fence "
@@ -1097,8 +1121,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 	return 0;
 out_no_create:
 	if (user_fence_rep != NULL)
-		ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
-					  handle, TTM_REF_USAGE);
+		ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE);
 out_no_ref_obj:
 	vmw_fence_obj_unreference(&fence);
 	return ret;

From 36274ab8c596f1240c606bb514da329add2a1bcd Mon Sep 17 00:00:00 2001
From: Murray McAllister <murray.mcallister@insomniasec.com>
Date: Mon, 27 Mar 2017 11:12:53 +0200
Subject: [PATCH 118/410] drm/vmwgfx: NULL pointer dereference in
 vmw_surface_define_ioctl()

Before memory allocations vmw_surface_define_ioctl() checks the
upper-bounds of a user-supplied size, but does not check if the
supplied size is 0.

Add check to avoid NULL pointer dereferences.

Cc: <stable@vger.kernel.org>
Signed-off-by: Murray McAllister <murray.mcallister@insomniasec.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index b445ce9b9757..f410502cb075 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -716,8 +716,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
 		num_sizes += req->mip_levels[i];
 
-	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES *
-	    DRM_VMW_MAX_MIP_LEVELS)
+	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS ||
+	    num_sizes == 0)
 		return -EINVAL;
 
 	size = vmw_user_surface_size + 128 +

From 63774069d9527a1aeaa4aa20e929ef5e8e9ecc38 Mon Sep 17 00:00:00 2001
From: Murray McAllister <murray.mcallister@insomniasec.com>
Date: Mon, 27 Mar 2017 11:15:12 +0200
Subject: [PATCH 119/410] drm/vmwgfx: avoid calling vzalloc with a 0 size in
 vmw_get_cap_3d_ioctl()

In vmw_get_cap_3d_ioctl(), a user can supply 0 for a size that is
used in vzalloc(). This eventually calls dump_stack() (in warn_alloc()),
which can leak useful addresses to dmesg.

Add check to avoid a size of 0.

Cc: <stable@vger.kernel.org>
Signed-off-by: Murray McAllister <murray.mcallister@insomniasec.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index b8c6a03c8c54..1802d0e7fab8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -186,7 +186,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 	bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS);
 	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 
-	if (unlikely(arg->pad64 != 0)) {
+	if (unlikely(arg->pad64 != 0 || arg->max_size == 0)) {
 		DRM_ERROR("Illegal GET_3D_CAP argument.\n");
 		return -EINVAL;
 	}

From fe25deb7737ce6c0879ccf79c99fa1221d428bf2 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 27 Mar 2017 11:21:25 +0200
Subject: [PATCH 120/410] drm/ttm, drm/vmwgfx: Relax permission checking when
 opening surfaces

Previously, when a surface was opened using a legacy (non prime) handle,
it was verified to have been created by a client in the same master realm.
Relax this so that opening is also allowed recursively if the client
already has the surface open.

This works around a regression in svga mesa where opening of a shared
surface is used recursively to obtain surface information.

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_object.c         | 10 +++++++---
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c    |  6 ++----
 drivers/gpu/drm/vmwgfx/vmwgfx_resource.c |  4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c  | 22 +++++++++-------------
 include/drm/ttm/ttm_object.h             |  5 ++++-
 5 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index fdb451e3ec01..d750140bafbc 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -179,7 +179,7 @@ int ttm_base_object_init(struct ttm_object_file *tfile,
 	if (unlikely(ret != 0))
 		goto out_err0;
 
-	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false);
 	if (unlikely(ret != 0))
 		goto out_err1;
 
@@ -318,7 +318,8 @@ EXPORT_SYMBOL(ttm_ref_object_exists);
 
 int ttm_ref_object_add(struct ttm_object_file *tfile,
 		       struct ttm_base_object *base,
-		       enum ttm_ref_type ref_type, bool *existed)
+		       enum ttm_ref_type ref_type, bool *existed,
+		       bool require_existed)
 {
 	struct drm_open_hash *ht = &tfile->ref_hash[ref_type];
 	struct ttm_ref_object *ref;
@@ -345,6 +346,9 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
 		}
 
 		rcu_read_unlock();
+		if (require_existed)
+			return -EPERM;
+
 		ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref),
 					   false, false);
 		if (unlikely(ret != 0))
@@ -635,7 +639,7 @@ int ttm_prime_fd_to_handle(struct ttm_object_file *tfile,
 	prime = (struct ttm_prime_object *) dma_buf->priv;
 	base = &prime->base;
 	*handle = base->hash.key;
-	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false);
 
 	dma_buf_put(dma_buf);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 4076063e0fdd..6b2708b4eafe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -1075,10 +1075,8 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 		(void) vmw_fence_obj_reference(fence);
 
 		if (user_fence_rep != NULL) {
-			bool existed;
-
-			ret = ttm_ref_object_add(tfile, base,
-						 TTM_REF_USAGE, &existed);
+			ret = ttm_ref_object_add(vmw_fp->tfile, base,
+						 TTM_REF_USAGE, NULL, false);
 			if (unlikely(ret != 0)) {
 				DRM_ERROR("Failed to reference a fence "
 					  "object.\n");
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 65b3f0369636..bf23153d4f55 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -589,7 +589,7 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo,
 		return ret;
 
 	ret = ttm_ref_object_add(tfile, &user_bo->prime.base,
-				 TTM_REF_SYNCCPU_WRITE, &existed);
+				 TTM_REF_SYNCCPU_WRITE, &existed, false);
 	if (ret != 0 || existed)
 		ttm_bo_synccpu_write_release(&user_bo->dma.base);
 
@@ -773,7 +773,7 @@ int vmw_user_dmabuf_reference(struct ttm_object_file *tfile,
 
 	*handle = user_bo->prime.base.hash.key;
 	return ttm_ref_object_add(tfile, &user_bo->prime.base,
-				  TTM_REF_USAGE, NULL);
+				  TTM_REF_USAGE, NULL, false);
 }
 
 /*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index f410502cb075..adc023fe67f3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -891,17 +891,16 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 	uint32_t handle;
 	struct ttm_base_object *base;
 	int ret;
+	bool require_exist = false;
 
 	if (handle_type == DRM_VMW_HANDLE_PRIME) {
 		ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle);
 		if (unlikely(ret != 0))
 			return ret;
 	} else {
-		if (unlikely(drm_is_render_client(file_priv))) {
-			DRM_ERROR("Render client refused legacy "
-				  "surface reference.\n");
-			return -EACCES;
-		}
+		if (unlikely(drm_is_render_client(file_priv)))
+			require_exist = true;
+
 		if (ACCESS_ONCE(vmw_fpriv(file_priv)->locked_master)) {
 			DRM_ERROR("Locked master refused legacy "
 				  "surface reference.\n");
@@ -929,17 +928,14 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 
 		/*
 		 * Make sure the surface creator has the same
-		 * authenticating master.
+		 * authenticating master, or is already registered with us.
 		 */
 		if (drm_is_primary_client(file_priv) &&
-		    user_srf->master != file_priv->master) {
-			DRM_ERROR("Trying to reference surface outside of"
-				  " master domain.\n");
-			ret = -EACCES;
-			goto out_bad_resource;
-		}
+		    user_srf->master != file_priv->master)
+			require_exist = true;
 
-		ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+		ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL,
+					 require_exist);
 		if (unlikely(ret != 0)) {
 			DRM_ERROR("Could not add a reference to a surface.\n");
 			goto out_bad_resource;
diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h
index ed953f98f0e1..1487011fe057 100644
--- a/include/drm/ttm/ttm_object.h
+++ b/include/drm/ttm/ttm_object.h
@@ -229,6 +229,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base);
  * @ref_type: The type of reference.
  * @existed: Upon completion, indicates that an identical reference object
  * already existed, and the refcount was upped on that object instead.
+ * @require_existed: Fail with -EPERM if an identical ref object didn't
+ * already exist.
  *
  * Checks that the base object is shareable and adds a ref object to it.
  *
@@ -243,7 +245,8 @@ extern void ttm_base_object_unref(struct ttm_base_object **p_base);
  */
 extern int ttm_ref_object_add(struct ttm_object_file *tfile,
 			      struct ttm_base_object *base,
-			      enum ttm_ref_type ref_type, bool *existed);
+			      enum ttm_ref_type ref_type, bool *existed,
+			      bool require_existed);
 
 extern bool ttm_ref_object_exists(struct ttm_object_file *tfile,
 				  struct ttm_base_object *base);

From 3ce7803cf3a7bc52c0eb9f516de8b72a0305ad57 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 27 Mar 2017 12:38:25 +0200
Subject: [PATCH 121/410] drm/ttm: Avoid calling drm_ht_remove from atomic
 context

On recent kernels, calling drm_ht_remove triggers a might_sleep() warning
from within vfree(). So avoid calling it from atomic context. The use-cases
we fix here are both from destructors so there should be no concurrent
use of the hash tables.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/ttm/ttm_object.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index d750140bafbc..26a7ad0f4789 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -453,10 +453,10 @@ void ttm_object_file_release(struct ttm_object_file **p_tfile)
 		ttm_ref_object_release(&ref->kref);
 	}
 
+	spin_unlock(&tfile->lock);
 	for (i = 0; i < TTM_REF_NUM; ++i)
 		drm_ht_remove(&tfile->ref_hash[i]);
 
-	spin_unlock(&tfile->lock);
 	ttm_object_file_unref(&tfile);
 }
 EXPORT_SYMBOL(ttm_object_file_release);
@@ -533,9 +533,7 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev)
 
 	*p_tdev = NULL;
 
-	spin_lock(&tdev->object_lock);
 	drm_ht_remove(&tdev->object_hash);
-	spin_unlock(&tdev->object_lock);
 
 	kfree(tdev);
 }

From 53e16798b0864464c5444a204e1bb93ae246c429 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 27 Mar 2017 13:06:05 +0200
Subject: [PATCH 122/410] drm/vmwgfx: Remove getparam error message

The mesa winsys sometimes uses unimplemented parameter requests to
check for features. Remove the error message to avoid bloating the
kernel log.

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 1802d0e7fab8..5ec24fd801cd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -114,8 +114,6 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		param->value = dev_priv->has_dx;
 		break;
 	default:
-		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
-			  param->param);
 		return -EINVAL;
 	}
 

From e7e11f99564222d82f0ce84bd521e57d78a6b678 Mon Sep 17 00:00:00 2001
From: Li Qiang <liq3ea@gmail.com>
Date: Mon, 27 Mar 2017 20:10:53 -0700
Subject: [PATCH 123/410] drm/vmwgfx: fix integer overflow in
 vmw_surface_define_ioctl()

In vmw_surface_define_ioctl(), the 'num_sizes' is the sum of the
'req->mip_levels' array. This array can be assigned any value from
the user space. As both the 'num_sizes' and the array is uint32_t,
it is easy to make 'num_sizes' overflow. The later 'mip_levels' is
used as the loop count. This can lead an oob write. Add the check of
'req->mip_levels' to avoid this.

Cc: <stable@vger.kernel.org>
Signed-off-by: Li Qiang <liqiang6-s@360.cn>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index adc023fe67f3..05fa092c942b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -713,8 +713,11 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			128;
 
 	num_sizes = 0;
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+		if (req->mip_levels[i] > DRM_VMW_MAX_MIP_LEVELS)
+			return -EINVAL;
 		num_sizes += req->mip_levels[i];
+	}
 
 	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS ||
 	    num_sizes == 0)

From f85726905745fb4f6e15c68e2ade9da5390f8d89 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@intel.com>
Date: Thu, 30 Mar 2017 11:32:05 +0800
Subject: [PATCH 124/410] drm/i915/gvt: exclude cfg space from failsafe mode

When test GVTg as below scenario:
  VM boot --> failsafe --> kill qemu --> VM boot.
Qemu report error at the second boot:
  ERROR: PCI region size must be pow2 type=0x0, size=0x1fa1000

Qemu need access PCI_ROM_ADDRESS reg to determine the size of expansion
PCI rom. The mechanism just like the BAR reg (write-read) and we should
return the size 0 since we have no rom. If we reject the write to
PCI_ROM_ADDRESS, Qemu cannot get the correct size of rom.

Essentially, GVTg failsafe mode should not break PCI function. So we
exclude cfg space from failsafe mode. This can fix above issue.

v2: add Fixes and Bugzilla link.

Fixes: fd64be636708d ("drm/i915/gvt: introduced failsafe mode into vgpu")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100296
Signed-off-by: Changbin Du <changbin.du@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/cfg_space.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index b7d7721e72fa..40af17ec6312 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -285,9 +285,6 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 {
 	int ret;
 
-	if (vgpu->failsafe)
-		return 0;
-
 	if (WARN_ON(bytes > 4))
 		return -EINVAL;
 

From ac77a0c463c1d7d659861f7b6d1261970dd3282a Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 30 Mar 2017 14:20:45 +0900
Subject: [PATCH 125/410] block: do not put mq context in
 blk_mq_alloc_request_hctx

In blk_mq_alloc_request_hctx, blk_mq_sched_get_request doesn't
get sw context so we don't need to put the context with
blk_mq_put_ctx. Unless, we will see preempt counter underflow.

Cc: Omar Sandoval <osandov@fb.com>
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6b6e7bc041db..935f2cc7c8c3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -321,7 +321,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
 
 	rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
 
-	blk_mq_put_ctx(alloc_data.ctx);
 	blk_queue_exit(q);
 
 	if (!rq)

From d43e85b7d7c759a01f92fa09bba5d6eb6e406795 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Mar 2017 12:15:41 +0200
Subject: [PATCH 126/410] ARM: orion5x: only call into phylib when available

Board code cannot call mdiobus_register_board_info() when phylib
or mdio_device is a loadable module:

arch/arm/plat-orion/common.o: In function `orion_ge00_switch_init':
:(.init.text+0x474): undefined reference to `mdiobus_register_board_info'

I had a number of ideas for how this could be solved, but after the MDIO
code got split out from PHYLIB it has gotten too hard, so I'm basically
giving up, and only call the mdiobus_register_board_info() function
if the MDIO layer is built-in to avoid the link error. This is similar
to how we handle PHY registration on other ARM platforms.

Fixes: 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs")
Fixes: 648ea0134069 ("net: phy: Allow pre-declaration of MDIO devices")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
---
 arch/arm/mach-orion5x/Kconfig | 1 +
 arch/arm/plat-orion/common.c  | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig
index 633442ad4e4c..2a7bb6ccdcb7 100644
--- a/arch/arm/mach-orion5x/Kconfig
+++ b/arch/arm/mach-orion5x/Kconfig
@@ -6,6 +6,7 @@ menuconfig ARCH_ORION5X
 	select GPIOLIB
 	select MVEBU_MBUS
 	select PCI
+	select PHYLIB if NETDEVICES
 	select PLAT_ORION_LEGACY
 	help
 	  Support for the following Marvell Orion 5x series SoCs:
diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c
index 9255b6d67ba5..aff6994950ba 100644
--- a/arch/arm/plat-orion/common.c
+++ b/arch/arm/plat-orion/common.c
@@ -468,6 +468,7 @@ void __init orion_ge11_init(struct mv643xx_eth_platform_data *eth_data,
 		    eth_data, &orion_ge11);
 }
 
+#ifdef CONFIG_ARCH_ORION5X
 /*****************************************************************************
  * Ethernet switch
  ****************************************************************************/
@@ -480,6 +481,9 @@ void __init orion_ge00_switch_init(struct dsa_chip_data *d)
 	struct mdio_board_info *bd;
 	unsigned int i;
 
+	if (!IS_BUILTIN(CONFIG_PHYLIB))
+		return;
+
 	for (i = 0; i < ARRAY_SIZE(d->port_names); i++)
 		if (!strcmp(d->port_names[i], "cpu"))
 			break;
@@ -493,6 +497,7 @@ void __init orion_ge00_switch_init(struct dsa_chip_data *d)
 
 	mdiobus_register_board_info(&orion_ge00_switch_board_info, 1);
 }
+#endif
 
 /*****************************************************************************
  * I2C

From 893dc68f1b18451e6d550b1884fc6be76e1bb90c Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Tue, 21 Mar 2017 09:24:11 -0500
Subject: [PATCH 127/410] rtlwifi: Fix scheduling while atomic splat

Following commit cceb0a597320 ("rtlwifi: Add work queue for c2h cmd."),
the following BUG is reported when rtl8723be is used:

BUG: sleeping function called from invalid context at mm/slab.h:432
in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/0
CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W  O    4.11.0-rc3-wl+ #276
Hardware name: TOSHIBA TECRA A50-A/TECRA A50-A, BIOS Version 4.50   09/29/2014
Call Trace:
 <IRQ>
 dump_stack+0x63/0x89
 ___might_sleep+0xe9/0x130
 __might_sleep+0x4a/0x90
 kmem_cache_alloc_trace+0x19f/0x200
 ? rtl_c2hcmd_enqueue+0x3e/0x110 [rtlwifi]
 rtl_c2hcmd_enqueue+0x3e/0x110 [rtlwifi]
 rtl8723be_c2h_packet_handler+0xac/0xc0 [rtl8723be]
 rtl8723be_rx_command_packet+0x37/0x5c [rtl8723be]
 _rtl_pci_rx_interrupt+0x200/0x6b0 [rtl_pci]
 _rtl_pci_interrupt+0x20c/0x5d0 [rtl_pci]
 __handle_irq_event_percpu+0x3f/0x1d0
 handle_irq_event_percpu+0x23/0x60
 handle_irq_event+0x3c/0x60
 handle_fasteoi_irq+0xa2/0x170
 handle_irq+0x20/0x30
 do_IRQ+0x48/0xd0
 common_interrupt+0x89/0x89
...

Although commit cceb0a597320 converted most c2h commands to use a work
queue, the Bluetooth coexistence routines can be in atomic mode when
they execute such a call.

Fixes: cceb0a597320 ("rtlwifi: Add work queue for c2h cmd.")
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index caea350f05aa..bdc379178e87 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -1742,12 +1742,14 @@ void rtl_c2hcmd_enqueue(struct ieee80211_hw *hw, u8 tag, u8 len, u8 *val)
 	unsigned long flags;
 	struct rtl_c2hcmd *c2hcmd;
 
-	c2hcmd = kmalloc(sizeof(*c2hcmd), GFP_KERNEL);
+	c2hcmd = kmalloc(sizeof(*c2hcmd),
+			 in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
 
 	if (!c2hcmd)
 		goto label_err;
 
-	c2hcmd->val = kmalloc(len, GFP_KERNEL);
+	c2hcmd->val = kmalloc(len,
+			      in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
 
 	if (!c2hcmd->val)
 		goto label_err2;

From d77facb88448cdeaaa3adba5b9704a48ac2ac8d6 Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Tue, 28 Mar 2017 09:11:30 +0100
Subject: [PATCH 128/410] brcmfmac: use local iftype avoiding use-after-free of
 virtual interface

A use-after-free was found using KASAN. In brcmf_p2p_del_if() the virtual
interface is removed using call to brcmf_remove_interface(). After that
the virtual interface instance has been freed and should not be referenced.
Solve this by storing the nl80211 iftype in local variable, which is used
in a couple of places anyway.

Cc: stable@vger.kernel.org # 4.10.x, 4.9.x
Reported-by: Daniel J Blueman <daniel@quora.org>
Reviewed-by: Hante Meuleman <hante.meuleman@broadcom.com>
Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index de19c7c92bc6..85d949e03f79 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -2238,14 +2238,16 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
 	struct brcmf_cfg80211_info *cfg = wiphy_priv(wiphy);
 	struct brcmf_p2p_info *p2p = &cfg->p2p;
 	struct brcmf_cfg80211_vif *vif;
+	enum nl80211_iftype iftype;
 	bool wait_for_disable = false;
 	int err;
 
 	brcmf_dbg(TRACE, "delete P2P vif\n");
 	vif = container_of(wdev, struct brcmf_cfg80211_vif, wdev);
 
+	iftype = vif->wdev.iftype;
 	brcmf_cfg80211_arm_vif_event(cfg, vif);
-	switch (vif->wdev.iftype) {
+	switch (iftype) {
 	case NL80211_IFTYPE_P2P_CLIENT:
 		if (test_bit(BRCMF_VIF_STATUS_DISCONNECTING, &vif->sme_state))
 			wait_for_disable = true;
@@ -2275,7 +2277,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
 					    BRCMF_P2P_DISABLE_TIMEOUT);
 
 	err = 0;
-	if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE) {
+	if (iftype != NL80211_IFTYPE_P2P_DEVICE) {
 		brcmf_vif_clear_mgmt_ies(vif);
 		err = brcmf_p2p_release_p2p_if(vif);
 	}
@@ -2291,7 +2293,7 @@ int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev)
 	brcmf_remove_interface(vif->ifp, true);
 
 	brcmf_cfg80211_arm_vif_event(cfg, NULL);
-	if (vif->wdev.iftype != NL80211_IFTYPE_P2P_DEVICE)
+	if (iftype != NL80211_IFTYPE_P2P_DEVICE)
 		p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = NULL;
 
 	return err;

From 2b6867c2ce76c596676bec7d2d525af525fdc6e2 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 29 Mar 2017 16:11:20 +0200
Subject: [PATCH 129/410] net/packet: fix overflow in check for priv area size

Subtracting tp_sizeof_priv from tp_block_size and casting to int
to check whether one is less then the other doesn't always work
(both of them are unsigned ints).

Compare them as is instead.

Also cast tp_sizeof_priv to u64 before using BLK_PLUS_PRIV, as
it can overflow inside BLK_PLUS_PRIV otherwise.

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index a0dbe7ca8f72..2323ee35dc09 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4193,8 +4193,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
 			goto out;
 		if (po->tp_version >= TPACKET_V3 &&
-		    (int)(req->tp_block_size -
-			  BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+		    req->tp_block_size <=
+			  BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv))
 			goto out;
 		if (unlikely(req->tp_frame_size < po->tp_hdrlen +
 					po->tp_reserve))

From 8f8d28e4d6d815a391285e121c3a53a0b6cb9e7b Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 29 Mar 2017 16:11:21 +0200
Subject: [PATCH 130/410] net/packet: fix overflow in check for tp_frame_nr

When calculating rb->frames_per_block * req->tp_block_nr the result
can overflow.

Add a check that tp_block_size * tp_block_nr <= UINT_MAX.

Since frames_per_block <= tp_block_size, the expression would
never overflow.

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2323ee35dc09..3ac286ebb2f4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4205,6 +4205,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
 		if (unlikely(rb->frames_per_block == 0))
 			goto out;
+		if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
+			goto out;
 		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
 					req->tp_frame_nr))
 			goto out;

From bcc5364bdcfe131e6379363f089e7b4108d35b70 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 29 Mar 2017 16:11:22 +0200
Subject: [PATCH 131/410] net/packet: fix overflow in check for tp_reserve

When calculating po->tp_hdrlen + po->tp_reserve the result can overflow.

Fix by checking that tp_reserve <= INT_MAX on assign.

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3ac286ebb2f4..8489beff5c25 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3665,6 +3665,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 			return -EBUSY;
 		if (copy_from_user(&val, optval, sizeof(val)))
 			return -EFAULT;
+		if (val > INT_MAX)
+			return -EINVAL;
 		po->tp_reserve = val;
 		return 0;
 	}

From 3dbcc105d5561e18ccd0842c7baab1c835562a37 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 30 Mar 2017 01:00:53 +0800
Subject: [PATCH 132/410] sctp: alloc stream info when initializing asoc

When sending a msg without asoc established, sctp will send INIT packet
first and then enqueue chunks.

Before receiving INIT_ACK, stream info is not yet alloced. But enqueuing
chunks needs to access stream info, like out stream state and out stream
cnt.

This patch is to fix it by allocing out stream info when initializing an
asoc, allocing in stream and re-allocing out stream when processing init.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  3 ++-
 net/sctp/associola.c       |  7 ++++++-
 net/sctp/sm_make_chunk.c   |  9 ++------
 net/sctp/stream.c          | 43 +++++++++++++++++++++++++++++++-------
 4 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 8caa5ee9e290..a127b7c2c3c9 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -377,7 +377,8 @@ typedef struct sctp_sender_hb_info {
 	__u64 hb_nonce;
 } sctp_sender_hb_info_t;
 
-struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp);
+int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp);
+int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp);
 void sctp_stream_free(struct sctp_stream *stream);
 void sctp_stream_clear(struct sctp_stream *stream);
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0439a1a68367..0b26df5f6188 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -246,6 +246,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	if (!sctp_ulpq_init(&asoc->ulpq, asoc))
 		goto fail_init;
 
+	if (sctp_stream_new(asoc, gfp))
+		goto fail_init;
+
 	/* Assume that peer would support both address types unless we are
 	 * told otherwise.
 	 */
@@ -264,7 +267,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	/* AUTH related initializations */
 	INIT_LIST_HEAD(&asoc->endpoint_shared_keys);
 	if (sctp_auth_asoc_copy_shkeys(ep, asoc, gfp))
-		goto fail_init;
+		goto stream_free;
 
 	asoc->active_key_id = ep->active_key_id;
 	asoc->prsctp_enable = ep->prsctp_enable;
@@ -287,6 +290,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 
 	return asoc;
 
+stream_free:
+	sctp_stream_free(asoc->stream);
 fail_init:
 	sock_put(asoc->base.sk);
 	sctp_endpoint_put(asoc->ep);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 969a30c7bb54..118faff6a332 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2460,15 +2460,10 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 	 * association.
 	 */
 	if (!asoc->temp) {
-		int error;
-
-		asoc->stream = sctp_stream_new(asoc->c.sinit_max_instreams,
-					       asoc->c.sinit_num_ostreams, gfp);
-		if (!asoc->stream)
+		if (sctp_stream_init(asoc, gfp))
 			goto clean_up;
 
-		error = sctp_assoc_set_id(asoc, gfp);
-		if (error)
+		if (sctp_assoc_set_id(asoc, gfp))
 			goto clean_up;
 	}
 
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 1c6cc04fa3a4..bbed997e1c5f 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -35,33 +35,60 @@
 #include <net/sctp/sctp.h>
 #include <net/sctp/sm.h>
 
-struct sctp_stream *sctp_stream_new(__u16 incnt, __u16 outcnt, gfp_t gfp)
+int sctp_stream_new(struct sctp_association *asoc, gfp_t gfp)
 {
 	struct sctp_stream *stream;
 	int i;
 
 	stream = kzalloc(sizeof(*stream), gfp);
 	if (!stream)
-		return NULL;
+		return -ENOMEM;
 
-	stream->outcnt = outcnt;
+	stream->outcnt = asoc->c.sinit_num_ostreams;
 	stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
 	if (!stream->out) {
 		kfree(stream);
-		return NULL;
+		return -ENOMEM;
 	}
 	for (i = 0; i < stream->outcnt; i++)
 		stream->out[i].state = SCTP_STREAM_OPEN;
 
-	stream->incnt = incnt;
+	asoc->stream = stream;
+
+	return 0;
+}
+
+int sctp_stream_init(struct sctp_association *asoc, gfp_t gfp)
+{
+	struct sctp_stream *stream = asoc->stream;
+	int i;
+
+	/* Initial stream->out size may be very big, so free it and alloc
+	 * a new one with new outcnt to save memory.
+	 */
+	kfree(stream->out);
+	stream->outcnt = asoc->c.sinit_num_ostreams;
+	stream->out = kcalloc(stream->outcnt, sizeof(*stream->out), gfp);
+	if (!stream->out)
+		goto nomem;
+
+	for (i = 0; i < stream->outcnt; i++)
+		stream->out[i].state = SCTP_STREAM_OPEN;
+
+	stream->incnt = asoc->c.sinit_max_instreams;
 	stream->in = kcalloc(stream->incnt, sizeof(*stream->in), gfp);
 	if (!stream->in) {
 		kfree(stream->out);
-		kfree(stream);
-		return NULL;
+		goto nomem;
 	}
 
-	return stream;
+	return 0;
+
+nomem:
+	asoc->stream = NULL;
+	kfree(stream);
+
+	return -ENOMEM;
 }
 
 void sctp_stream_free(struct sctp_stream *stream)

From 2b83878dd74a7c73bedcb6600663c1c46836e8af Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Wed, 29 Mar 2017 15:44:47 -0700
Subject: [PATCH 133/410] xtensa: make __pa work with uncached KSEG addresses

When __pa is applied to virtual address in uncached KSEG region the
result is incorrect. Fix it by checking if the original address is in
the uncached KSEG and adjusting the result. It looks better than masking
off bits because pfn_valid would correctly work with new __pa results
and it may be made working in noMMU case, once we get definition for
uncached memory view.

This is required for the dma_common_mmap and DMA debug code to work
correctly: they both indirectly use __pa with coherent DMA addresses.
In case of DMA debug the visible effect is false reports that an address
mapped for DMA is accessed by CPU.

Cc: stable@vger.kernel.org
Tested-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/page.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/xtensa/include/asm/page.h b/arch/xtensa/include/asm/page.h
index 976b1d70edbc..4ddbfd57a7c8 100644
--- a/arch/xtensa/include/asm/page.h
+++ b/arch/xtensa/include/asm/page.h
@@ -164,8 +164,21 @@ void copy_user_highpage(struct page *to, struct page *from,
 
 #define ARCH_PFN_OFFSET		(PHYS_OFFSET >> PAGE_SHIFT)
 
+#ifdef CONFIG_MMU
+static inline unsigned long ___pa(unsigned long va)
+{
+	unsigned long off = va - PAGE_OFFSET;
+
+	if (off >= XCHAL_KSEG_SIZE)
+		off -= XCHAL_KSEG_SIZE;
+
+	return off + PHYS_OFFSET;
+}
+#define __pa(x)	___pa((unsigned long)(x))
+#else
 #define __pa(x)	\
 	((unsigned long) (x) - PAGE_OFFSET + PHYS_OFFSET)
+#endif
 #define __va(x)	\
 	((void *)((unsigned long) (x) - PHYS_OFFSET + PAGE_OFFSET))
 #define pfn_valid(pfn) \

From 0b98ca2a45e35dfe02f4512fa30b9f5a9900cb29 Mon Sep 17 00:00:00 2001
From: Suresh Reddy <suresh.reddy@broadcom.com>
Date: Thu, 30 Mar 2017 00:58:32 -0400
Subject: [PATCH 134/410] be2net: Fix endian issue in logical link config
 command

Use cpu_to_le32() for link_config variable in set_logical_link_config
command as this variable is of type u32.

Signed-off-by: Suresh Reddy <suresh.reddy@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 30e855004c57..02dd5246dfae 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -4939,8 +4939,9 @@ static int
 __be_cmd_set_logical_link_config(struct be_adapter *adapter,
 				 int link_state, int version, u8 domain)
 {
-	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_set_ll_link *req;
+	struct be_mcc_wrb *wrb;
+	u32 link_config = 0;
 	int status;
 
 	mutex_lock(&adapter->mcc_lock);
@@ -4962,10 +4963,12 @@ __be_cmd_set_logical_link_config(struct be_adapter *adapter,
 
 	if (link_state == IFLA_VF_LINK_STATE_ENABLE ||
 	    link_state == IFLA_VF_LINK_STATE_AUTO)
-		req->link_config |= PLINK_ENABLE;
+		link_config |= PLINK_ENABLE;
 
 	if (link_state == IFLA_VF_LINK_STATE_AUTO)
-		req->link_config |= PLINK_TRACK;
+		link_config |= PLINK_TRACK;
+
+	req->link_config = cpu_to_le32(link_config);
 
 	status = be_mcc_notify_wait(adapter);
 err:

From fa7e25cf13a6d0b82b5ed1008246f44d42e8422c Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sun, 30 Oct 2016 17:28:16 -0700
Subject: [PATCH 135/410] target: Fix unknown fabric callback queue-full errors

This patch fixes a set of queue-full response handling
bugs, where outgoing responses are leaked when a fabric
driver is propagating non -EAGAIN or -ENOMEM errors
to target-core.

It introduces TRANSPORT_COMPLETE_QF_ERR state used to
signal when CHECK_CONDITION status should be generated,
when fabric driver ->write_pending(), ->queue_data_in(),
or ->queue_status() callbacks fail with non -EAGAIN or
-ENOMEM errors, and data-transfer should not be retried.

Note all fabric driver -EAGAIN and -ENOMEM errors are
still retried indefinately with associated data-transfer
callbacks, following existing queue-full logic.

Also fix two missing ->queue_status() queue-full cases
related to CMD_T_ABORTED w/ TAS status handling.

Reported-by: Potnuri Bharat Teja <bharat@chelsio.com>
Reviewed-by: Potnuri Bharat Teja <bharat@chelsio.com>
Tested-by: Potnuri Bharat Teja <bharat@chelsio.com>
Cc: Potnuri Bharat Teja <bharat@chelsio.com>
Reported-by: Steve Wise <swise@opengridcomputing.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_transport.c | 102 ++++++++++++++++---------
 include/target/target_core_base.h      |   1 +
 2 files changed, 69 insertions(+), 34 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index b1a3cdb29468..a0cd56ee5fe9 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -64,8 +64,9 @@ struct kmem_cache *t10_alua_lba_map_cache;
 struct kmem_cache *t10_alua_lba_map_mem_cache;
 
 static void transport_complete_task_attr(struct se_cmd *cmd);
+static int translate_sense_reason(struct se_cmd *cmd, sense_reason_t reason);
 static void transport_handle_queue_full(struct se_cmd *cmd,
-		struct se_device *dev);
+		struct se_device *dev, int err, bool write_pending);
 static int transport_put_cmd(struct se_cmd *cmd);
 static void target_complete_ok_work(struct work_struct *work);
 
@@ -804,7 +805,8 @@ void target_qf_do_work(struct work_struct *work)
 
 		if (cmd->t_state == TRANSPORT_COMPLETE_QF_WP)
 			transport_write_pending_qf(cmd);
-		else if (cmd->t_state == TRANSPORT_COMPLETE_QF_OK)
+		else if (cmd->t_state == TRANSPORT_COMPLETE_QF_OK ||
+			 cmd->t_state == TRANSPORT_COMPLETE_QF_ERR)
 			transport_complete_qf(cmd);
 	}
 }
@@ -1719,7 +1721,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 		}
 		trace_target_cmd_complete(cmd);
 		ret = cmd->se_tfo->queue_status(cmd);
-		if (ret == -EAGAIN || ret == -ENOMEM)
+		if (ret)
 			goto queue_full;
 		goto check_stop;
 	default:
@@ -1730,7 +1732,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 	}
 
 	ret = transport_send_check_condition_and_sense(cmd, sense_reason, 0);
-	if (ret == -EAGAIN || ret == -ENOMEM)
+	if (ret)
 		goto queue_full;
 
 check_stop:
@@ -1739,8 +1741,7 @@ check_stop:
 	return;
 
 queue_full:
-	cmd->t_state = TRANSPORT_COMPLETE_QF_OK;
-	transport_handle_queue_full(cmd, cmd->se_dev);
+	transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
 }
 EXPORT_SYMBOL(transport_generic_request_failure);
 
@@ -1977,13 +1978,29 @@ static void transport_complete_qf(struct se_cmd *cmd)
 	int ret = 0;
 
 	transport_complete_task_attr(cmd);
+	/*
+	 * If a fabric driver ->write_pending() or ->queue_data_in() callback
+	 * has returned neither -ENOMEM or -EAGAIN, assume it's fatal and
+	 * the same callbacks should not be retried.  Return CHECK_CONDITION
+	 * if a scsi_status is not already set.
+	 *
+	 * If a fabric driver ->queue_status() has returned non zero, always
+	 * keep retrying no matter what..
+	 */
+	if (cmd->t_state == TRANSPORT_COMPLETE_QF_ERR) {
+		if (cmd->scsi_status)
+			goto queue_status;
 
-	if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) {
-		trace_target_cmd_complete(cmd);
-		ret = cmd->se_tfo->queue_status(cmd);
-		goto out;
+		cmd->se_cmd_flags |= SCF_EMULATED_TASK_SENSE;
+		cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
+		cmd->scsi_sense_length  = TRANSPORT_SENSE_BUFFER;
+		translate_sense_reason(cmd, TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+		goto queue_status;
 	}
 
+	if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)
+		goto queue_status;
+
 	switch (cmd->data_direction) {
 	case DMA_FROM_DEVICE:
 		if (cmd->scsi_status)
@@ -2007,19 +2024,33 @@ queue_status:
 		break;
 	}
 
-out:
 	if (ret < 0) {
-		transport_handle_queue_full(cmd, cmd->se_dev);
+		transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
 		return;
 	}
 	transport_lun_remove_cmd(cmd);
 	transport_cmd_check_stop_to_fabric(cmd);
 }
 
-static void transport_handle_queue_full(
-	struct se_cmd *cmd,
-	struct se_device *dev)
+static void transport_handle_queue_full(struct se_cmd *cmd, struct se_device *dev,
+					int err, bool write_pending)
 {
+	/*
+	 * -EAGAIN or -ENOMEM signals retry of ->write_pending() and/or
+	 * ->queue_data_in() callbacks from new process context.
+	 *
+	 * Otherwise for other errors, transport_complete_qf() will send
+	 * CHECK_CONDITION via ->queue_status() instead of attempting to
+	 * retry associated fabric driver data-transfer callbacks.
+	 */
+	if (err == -EAGAIN || err == -ENOMEM) {
+		cmd->t_state = (write_pending) ? TRANSPORT_COMPLETE_QF_WP :
+						 TRANSPORT_COMPLETE_QF_OK;
+	} else {
+		pr_warn_ratelimited("Got unknown fabric queue status: %d\n", err);
+		cmd->t_state = TRANSPORT_COMPLETE_QF_ERR;
+	}
+
 	spin_lock_irq(&dev->qf_cmd_lock);
 	list_add_tail(&cmd->se_qf_node, &cmd->se_dev->qf_cmd_list);
 	atomic_inc_mb(&dev->dev_qf_count);
@@ -2083,7 +2114,7 @@ static void target_complete_ok_work(struct work_struct *work)
 		WARN_ON(!cmd->scsi_status);
 		ret = transport_send_check_condition_and_sense(
 					cmd, 0, 1);
-		if (ret == -EAGAIN || ret == -ENOMEM)
+		if (ret)
 			goto queue_full;
 
 		transport_lun_remove_cmd(cmd);
@@ -2109,7 +2140,7 @@ static void target_complete_ok_work(struct work_struct *work)
 		} else if (rc) {
 			ret = transport_send_check_condition_and_sense(cmd,
 						rc, 0);
-			if (ret == -EAGAIN || ret == -ENOMEM)
+			if (ret)
 				goto queue_full;
 
 			transport_lun_remove_cmd(cmd);
@@ -2134,7 +2165,7 @@ queue_rsp:
 		if (target_read_prot_action(cmd)) {
 			ret = transport_send_check_condition_and_sense(cmd,
 						cmd->pi_err, 0);
-			if (ret == -EAGAIN || ret == -ENOMEM)
+			if (ret)
 				goto queue_full;
 
 			transport_lun_remove_cmd(cmd);
@@ -2144,7 +2175,7 @@ queue_rsp:
 
 		trace_target_cmd_complete(cmd);
 		ret = cmd->se_tfo->queue_data_in(cmd);
-		if (ret == -EAGAIN || ret == -ENOMEM)
+		if (ret)
 			goto queue_full;
 		break;
 	case DMA_TO_DEVICE:
@@ -2157,7 +2188,7 @@ queue_rsp:
 			atomic_long_add(cmd->data_length,
 					&cmd->se_lun->lun_stats.tx_data_octets);
 			ret = cmd->se_tfo->queue_data_in(cmd);
-			if (ret == -EAGAIN || ret == -ENOMEM)
+			if (ret)
 				goto queue_full;
 			break;
 		}
@@ -2166,7 +2197,7 @@ queue_rsp:
 queue_status:
 		trace_target_cmd_complete(cmd);
 		ret = cmd->se_tfo->queue_status(cmd);
-		if (ret == -EAGAIN || ret == -ENOMEM)
+		if (ret)
 			goto queue_full;
 		break;
 	default:
@@ -2180,8 +2211,8 @@ queue_status:
 queue_full:
 	pr_debug("Handling complete_ok QUEUE_FULL: se_cmd: %p,"
 		" data_direction: %d\n", cmd, cmd->data_direction);
-	cmd->t_state = TRANSPORT_COMPLETE_QF_OK;
-	transport_handle_queue_full(cmd, cmd->se_dev);
+
+	transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
 }
 
 void target_free_sgl(struct scatterlist *sgl, int nents)
@@ -2449,18 +2480,14 @@ transport_generic_new_cmd(struct se_cmd *cmd)
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
 	ret = cmd->se_tfo->write_pending(cmd);
-	if (ret == -EAGAIN || ret == -ENOMEM)
+	if (ret)
 		goto queue_full;
 
-	/* fabric drivers should only return -EAGAIN or -ENOMEM as error */
-	WARN_ON(ret);
-
-	return (!ret) ? 0 : TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+	return 0;
 
 queue_full:
 	pr_debug("Handling write_pending QUEUE__FULL: se_cmd: %p\n", cmd);
-	cmd->t_state = TRANSPORT_COMPLETE_QF_WP;
-	transport_handle_queue_full(cmd, cmd->se_dev);
+	transport_handle_queue_full(cmd, cmd->se_dev, ret, true);
 	return 0;
 }
 EXPORT_SYMBOL(transport_generic_new_cmd);
@@ -2470,10 +2497,10 @@ static void transport_write_pending_qf(struct se_cmd *cmd)
 	int ret;
 
 	ret = cmd->se_tfo->write_pending(cmd);
-	if (ret == -EAGAIN || ret == -ENOMEM) {
+	if (ret) {
 		pr_debug("Handling write_pending QUEUE__FULL: se_cmd: %p\n",
 			 cmd);
-		transport_handle_queue_full(cmd, cmd->se_dev);
+		transport_handle_queue_full(cmd, cmd->se_dev, ret, true);
 	}
 }
 
@@ -3011,6 +3038,8 @@ static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status)
 	__releases(&cmd->t_state_lock)
 	__acquires(&cmd->t_state_lock)
 {
+	int ret;
+
 	assert_spin_locked(&cmd->t_state_lock);
 	WARN_ON_ONCE(!irqs_disabled());
 
@@ -3034,7 +3063,9 @@ static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status)
 	trace_target_cmd_complete(cmd);
 
 	spin_unlock_irq(&cmd->t_state_lock);
-	cmd->se_tfo->queue_status(cmd);
+	ret = cmd->se_tfo->queue_status(cmd);
+	if (ret)
+		transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
 	spin_lock_irq(&cmd->t_state_lock);
 
 	return 1;
@@ -3055,6 +3086,7 @@ EXPORT_SYMBOL(transport_check_aborted_status);
 void transport_send_task_abort(struct se_cmd *cmd)
 {
 	unsigned long flags;
+	int ret;
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
 	if (cmd->se_cmd_flags & (SCF_SENT_CHECK_CONDITION)) {
@@ -3090,7 +3122,9 @@ send_abort:
 		 cmd->t_task_cdb[0], cmd->tag);
 
 	trace_target_cmd_complete(cmd);
-	cmd->se_tfo->queue_status(cmd);
+	ret = cmd->se_tfo->queue_status(cmd);
+	if (ret)
+		transport_handle_queue_full(cmd, cmd->se_dev, ret, false);
 }
 
 static void target_tmr_work(struct work_struct *work)
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 2e282461cfa5..730ed3055336 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -117,6 +117,7 @@ enum transport_state_table {
 	TRANSPORT_ISTATE_PROCESSING = 11,
 	TRANSPORT_COMPLETE_QF_WP = 18,
 	TRANSPORT_COMPLETE_QF_OK = 19,
+	TRANSPORT_COMPLETE_QF_ERR = 20,
 };
 
 /* Used for struct se_cmd->se_cmd_flags */

From a4467018c2a7228f4ef58051f0511bd037bff264 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sun, 30 Oct 2016 17:30:08 -0700
Subject: [PATCH 136/410] iscsi-target: Propigate queue_data_in + queue_status
 errors

This patch changes iscsi-target to propagate iscsit_transport
->iscsit_queue_data_in() and ->iscsit_queue_status() callback
errors, back up into target-core.

This allows target-core to retry failed iscsit_transport
callbacks using internal queue-full logic.

Reported-by: Potnuri Bharat Teja <bharat@chelsio.com>
Reviewed-by: Potnuri Bharat Teja <bharat@chelsio.com>
Tested-by: Potnuri Bharat Teja <bharat@chelsio.com>
Cc: Potnuri Bharat Teja <bharat@chelsio.com>
Reported-by: Steve Wise <swise@opengridcomputing.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c          |  3 +--
 drivers/target/iscsi/iscsi_target_configfs.c | 13 +++++--------
 drivers/target/iscsi/iscsi_target_util.c     |  5 +++--
 drivers/target/iscsi/iscsi_target_util.h     |  2 +-
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index a91802432f2f..e3f9ed3690b7 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -485,8 +485,7 @@ static void iscsit_get_rx_pdu(struct iscsi_conn *);
 
 int iscsit_queue_rsp(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {
-	iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
-	return 0;
+	return iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
 }
 EXPORT_SYMBOL(iscsit_queue_rsp);
 
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index bf40f03755dd..344e8448869c 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1398,11 +1398,10 @@ static u32 lio_sess_get_initiator_sid(
 static int lio_queue_data_in(struct se_cmd *se_cmd)
 {
 	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+	struct iscsi_conn *conn = cmd->conn;
 
 	cmd->i_state = ISTATE_SEND_DATAIN;
-	cmd->conn->conn_transport->iscsit_queue_data_in(cmd->conn, cmd);
-
-	return 0;
+	return conn->conn_transport->iscsit_queue_data_in(conn, cmd);
 }
 
 static int lio_write_pending(struct se_cmd *se_cmd)
@@ -1431,16 +1430,14 @@ static int lio_write_pending_status(struct se_cmd *se_cmd)
 static int lio_queue_status(struct se_cmd *se_cmd)
 {
 	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
+	struct iscsi_conn *conn = cmd->conn;
 
 	cmd->i_state = ISTATE_SEND_STATUS;
 
 	if (cmd->se_cmd.scsi_status || cmd->sense_reason) {
-		iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state);
-		return 0;
+		return iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state);
 	}
-	cmd->conn->conn_transport->iscsit_queue_status(cmd->conn, cmd);
-
-	return 0;
+	return conn->conn_transport->iscsit_queue_status(conn, cmd);
 }
 
 static void lio_queue_tm_rsp(struct se_cmd *se_cmd)
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index b4640338f8d8..7d3e2fcc26a0 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -567,7 +567,7 @@ static void iscsit_remove_cmd_from_immediate_queue(
 	}
 }
 
-void iscsit_add_cmd_to_response_queue(
+int iscsit_add_cmd_to_response_queue(
 	struct iscsi_cmd *cmd,
 	struct iscsi_conn *conn,
 	u8 state)
@@ -578,7 +578,7 @@ void iscsit_add_cmd_to_response_queue(
 	if (!qr) {
 		pr_err("Unable to allocate memory for"
 			" struct iscsi_queue_req\n");
-		return;
+		return -ENOMEM;
 	}
 	INIT_LIST_HEAD(&qr->qr_list);
 	qr->cmd = cmd;
@@ -590,6 +590,7 @@ void iscsit_add_cmd_to_response_queue(
 	spin_unlock_bh(&conn->response_queue_lock);
 
 	wake_up(&conn->queues_wq);
+	return 0;
 }
 
 struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn)
diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h
index 8ff08856516a..9e4197af8708 100644
--- a/drivers/target/iscsi/iscsi_target_util.h
+++ b/drivers/target/iscsi/iscsi_target_util.h
@@ -31,7 +31,7 @@ extern int iscsit_find_cmd_for_recovery(struct iscsi_session *, struct iscsi_cmd
 			struct iscsi_conn_recovery **, itt_t);
 extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
 extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *);
-extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
+extern int iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8);
 extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *);
 extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *);
 extern bool iscsit_conn_all_queues_empty(struct iscsi_conn *);

From 555a65f66c3c4d9dd46a565418b0b655d861a723 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sun, 30 Oct 2016 18:58:39 -0700
Subject: [PATCH 137/410] iser-target: Fix queue-full response handling

This patch addresses two queue-full handling bugs in iser-target.

The first is propagating isert_rdma_rw_ctx_post() return back
to target-core via isert_put_datain() + isert_get_dataout()
callbacks, in order to trigger queue-full logic in target-core.
Note target-core expects -EAGAIN or -ENOMEM error to signal
RDMA WRITE/READ data-transfer callbacks should be retried,
after queue-full logic been invoked.

Other types of errors propagated up from RDMA RW API will result
in target-core generating internal CHECK_CONDITION status,
avoiding subsequent isert_put_datain() and isert_get_dataout()
iscsit_transport callback retry attempts.

The second is to use transport_generic_request_failure()
during T10-PI hw-offload errors in isert_rdma_write_done()
and isert_rdma_read_done(), so CHECK_CONDITION queue-full
is handled internally by target-core.

Also add isert_put_response() T10-PI failure case fixme in
isert_rdma_write_done(), which is currently not internally
retried or released until session reinstatement.

Reported-by: Potnuri Bharat Teja <bharat@chelsio.com>
Reviewed-by: Potnuri Bharat Teja <bharat@chelsio.com>
Tested-by: Potnuri Bharat Teja <bharat@chelsio.com>
Cc: Potnuri Bharat Teja <bharat@chelsio.com>
Reported-by: Steve Wise <swise@opengridcomputing.com>
Cc: Steve Wise <swise@opengridcomputing.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 53 ++++++++++++++++---------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 91cbe86b25c8..9b33c0c97468 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -1659,10 +1659,23 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 	ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr);
 	isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn);
 
-	if (ret)
-		transport_send_check_condition_and_sense(cmd, cmd->pi_err, 0);
-	else
-		isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
+	if (ret) {
+		/*
+		 * transport_generic_request_failure() expects to have
+		 * plus two references to handle queue-full, so re-add
+		 * one here as target-core will have already dropped
+		 * it after the first isert_put_datain() callback.
+		 */
+		kref_get(&cmd->cmd_kref);
+		transport_generic_request_failure(cmd, cmd->pi_err);
+	} else {
+		/*
+		 * XXX: isert_put_response() failure is not retried.
+		 */
+		ret = isert_put_response(isert_conn->conn, isert_cmd->iscsi_cmd);
+		if (ret)
+			pr_warn_ratelimited("isert_put_response() ret: %d\n", ret);
+	}
 }
 
 static void
@@ -1699,13 +1712,15 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
 	cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
 	spin_unlock_bh(&cmd->istate_lock);
 
-	if (ret) {
-		target_put_sess_cmd(se_cmd);
-		transport_send_check_condition_and_sense(se_cmd,
-							 se_cmd->pi_err, 0);
-	} else {
+	/*
+	 * transport_generic_request_failure() will drop the extra
+	 * se_cmd->cmd_kref reference after T10-PI error, and handle
+	 * any non-zero ->queue_status() callback error retries.
+	 */
+	if (ret)
+		transport_generic_request_failure(se_cmd, se_cmd->pi_err);
+	else
 		target_execute_cmd(se_cmd);
-	}
 }
 
 static void
@@ -2171,26 +2186,28 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 		chain_wr = &isert_cmd->tx_desc.send_wr;
 	}
 
-	isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
-	isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n", isert_cmd);
-	return 1;
+	rc = isert_rdma_rw_ctx_post(isert_cmd, isert_conn, cqe, chain_wr);
+	isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ rc: %d\n",
+		  isert_cmd, rc);
+	return rc;
 }
 
 static int
 isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
 {
 	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+	int ret;
 
 	isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
 		 isert_cmd, cmd->se_cmd.data_length, cmd->write_data_done);
 
 	isert_cmd->tx_desc.tx_cqe.done = isert_rdma_read_done;
-	isert_rdma_rw_ctx_post(isert_cmd, conn->context,
-			&isert_cmd->tx_desc.tx_cqe, NULL);
+	ret = isert_rdma_rw_ctx_post(isert_cmd, conn->context,
+				     &isert_cmd->tx_desc.tx_cqe, NULL);
 
-	isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
-		 isert_cmd);
-	return 0;
+	isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE rc: %d\n",
+		 isert_cmd, ret);
+	return ret;
 }
 
 static int

From 7a56dc8888be23f44158a85b92da45d545cbf548 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 22 Mar 2017 17:07:30 +0200
Subject: [PATCH 138/410] iser-target: avoid posting a recv buffer twice

We pre-allocate our send-queues and might overflow them
in case we have multi work-request operations which tend
to occur for large RDMA transfers over devices with limited
allowed sg elements. When we get to a queue-full condition
we might retry again later, so track our receive buffers
so we don't repost them for a retry case.

Reported-by: Potnuri Bharat Teja <bharat@chelsio.com>
Tested-by: Potnuri Bharat Teja <bharat@chelsio.com>
Reviewed-by: Potnuri Bharat Teja <bharat@chelsio.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 12 ++++++++++++
 drivers/infiniband/ulp/isert/ib_isert.h |  3 ++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 9b33c0c97468..fcbed35e95a8 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -817,6 +817,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
 		rx_wr->sg_list = &rx_desc->rx_sg;
 		rx_wr->num_sge = 1;
 		rx_wr->next = rx_wr + 1;
+		rx_desc->in_use = false;
 	}
 	rx_wr--;
 	rx_wr->next = NULL; /* mark end of work requests list */
@@ -835,6 +836,15 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
 	struct ib_recv_wr *rx_wr_failed, rx_wr;
 	int ret;
 
+	if (!rx_desc->in_use) {
+		/*
+		 * if the descriptor is not in-use we already reposted it
+		 * for recv, so just silently return
+		 */
+		return 0;
+	}
+
+	rx_desc->in_use = false;
 	rx_wr.wr_cqe = &rx_desc->rx_cqe;
 	rx_wr.sg_list = &rx_desc->rx_sg;
 	rx_wr.num_sge = 1;
@@ -1397,6 +1407,8 @@ isert_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 		return;
 	}
 
+	rx_desc->in_use = true;
+
 	ib_dma_sync_single_for_cpu(ib_dev, rx_desc->dma_addr,
 			ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
 
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index c02ada57d7f5..87d994de8c91 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -60,7 +60,7 @@
 
 #define ISER_RX_PAD_SIZE	(ISCSI_DEF_MAX_RECV_SEG_LEN + 4096 - \
 		(ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge) + \
-		 sizeof(struct ib_cqe)))
+		 sizeof(struct ib_cqe) + sizeof(bool)))
 
 #define ISCSI_ISER_SG_TABLESIZE		256
 
@@ -85,6 +85,7 @@ struct iser_rx_desc {
 	u64		dma_addr;
 	struct ib_sge	rx_sg;
 	struct ib_cqe	rx_cqe;
+	bool		in_use;
 	char		pad[ISER_RX_PAD_SIZE];
 } __packed;
 

From d19c4643a52f0a56a7ccc86b145f207a57f40116 Mon Sep 17 00:00:00 2001
From: Mike Christie <mchristi@redhat.com>
Date: Wed, 29 Mar 2017 00:19:24 -0500
Subject: [PATCH 139/410] target: Fix ALUA transition state race between
 multiple initiators

Multiple threads could be writing to alua_access_state at
the same time, or there could be multiple STPGs in flight
(different initiators sending them or one initiator sending
them to different ports), or a combo of both and the
core_alua_do_transition_tg_pt calls will race with each other.

Because from the last patches we no longer delay running
core_alua_do_transition_tg_pt_work, there does not seem to be
any point in running that in a workqueue. And, we always
wait for it to complete one way or another, so we can sleep
in this code path. So, this patch made over target-pending just adds a
mutex and does the work core_alua_do_transition_tg_pt_work was doing in
core_alua_do_transition_tg_pt.

There is also no need to use an atomic for the
tg_pt_gp_alua_access_state. In core_alua_do_transition_tg_pt we will
test and set it under the transition mutex. And, it is a int/32 bits
so in the other places where it is read, we will never see it partially
updated.

Signed-off-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_alua.c     | 154 ++++++++++----------------
 drivers/target/target_core_configfs.c |   2 +-
 include/target/target_core_base.h     |   8 +-
 3 files changed, 60 insertions(+), 104 deletions(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index fd7c16a7ca6e..fc4a9c303d55 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -197,8 +197,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
 		/*
 		 * Set the ASYMMETRIC ACCESS State
 		 */
-		buf[off++] |= (atomic_read(
-			&tg_pt_gp->tg_pt_gp_alua_access_state) & 0xff);
+		buf[off++] |= tg_pt_gp->tg_pt_gp_alua_access_state & 0xff;
 		/*
 		 * Set supported ASYMMETRIC ACCESS State bits
 		 */
@@ -710,7 +709,7 @@ target_alua_state_check(struct se_cmd *cmd)
 
 	spin_lock(&lun->lun_tg_pt_gp_lock);
 	tg_pt_gp = lun->lun_tg_pt_gp;
-	out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
+	out_alua_state = tg_pt_gp->tg_pt_gp_alua_access_state;
 	nonop_delay_msecs = tg_pt_gp->tg_pt_gp_nonop_delay_msecs;
 
 	// XXX: keeps using tg_pt_gp witout reference after unlock
@@ -911,7 +910,7 @@ static int core_alua_write_tpg_metadata(
 }
 
 /*
- * Called with tg_pt_gp->tg_pt_gp_md_mutex held
+ * Called with tg_pt_gp->tg_pt_gp_transition_mutex held
  */
 static int core_alua_update_tpg_primary_metadata(
 	struct t10_alua_tg_pt_gp *tg_pt_gp)
@@ -934,7 +933,7 @@ static int core_alua_update_tpg_primary_metadata(
 			"alua_access_state=0x%02x\n"
 			"alua_access_status=0x%02x\n",
 			tg_pt_gp->tg_pt_gp_id,
-			tg_pt_gp->tg_pt_gp_alua_pending_state,
+			tg_pt_gp->tg_pt_gp_alua_access_state,
 			tg_pt_gp->tg_pt_gp_alua_access_status);
 
 	snprintf(path, ALUA_METADATA_PATH_LEN,
@@ -1013,13 +1012,52 @@ static void core_alua_queue_state_change_ua(struct t10_alua_tg_pt_gp *tg_pt_gp)
 	spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
 }
 
-static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
+static int core_alua_do_transition_tg_pt(
+	struct t10_alua_tg_pt_gp *tg_pt_gp,
+	int new_state,
+	int explicit)
 {
-	struct t10_alua_tg_pt_gp *tg_pt_gp = container_of(work,
-		struct t10_alua_tg_pt_gp, tg_pt_gp_transition_work);
-	struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
-	bool explicit = (tg_pt_gp->tg_pt_gp_alua_access_status ==
-			 ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG);
+	int prev_state;
+
+	mutex_lock(&tg_pt_gp->tg_pt_gp_transition_mutex);
+	/* Nothing to be done here */
+	if (tg_pt_gp->tg_pt_gp_alua_access_state == new_state) {
+		mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
+		return 0;
+	}
+
+	if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION) {
+		mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
+		return -EAGAIN;
+	}
+
+	/*
+	 * Save the old primary ALUA access state, and set the current state
+	 * to ALUA_ACCESS_STATE_TRANSITION.
+	 */
+	prev_state = tg_pt_gp->tg_pt_gp_alua_access_state;
+	tg_pt_gp->tg_pt_gp_alua_access_state = ALUA_ACCESS_STATE_TRANSITION;
+	tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
+				ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG :
+				ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA;
+
+	core_alua_queue_state_change_ua(tg_pt_gp);
+
+	if (new_state == ALUA_ACCESS_STATE_TRANSITION) {
+		mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
+		return 0;
+	}
+
+	/*
+	 * Check for the optional ALUA primary state transition delay
+	 */
+	if (tg_pt_gp->tg_pt_gp_trans_delay_msecs != 0)
+		msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs);
+
+	/*
+	 * Set the current primary ALUA access state to the requested new state
+	 */
+	tg_pt_gp->tg_pt_gp_alua_access_state = new_state;
 
 	/*
 	 * Update the ALUA metadata buf that has been allocated in
@@ -1034,93 +1072,19 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
 	 * struct file does NOT affect the actual ALUA transition.
 	 */
 	if (tg_pt_gp->tg_pt_gp_write_metadata) {
-		mutex_lock(&tg_pt_gp->tg_pt_gp_md_mutex);
 		core_alua_update_tpg_primary_metadata(tg_pt_gp);
-		mutex_unlock(&tg_pt_gp->tg_pt_gp_md_mutex);
 	}
-	/*
-	 * Set the current primary ALUA access state to the requested new state
-	 */
-	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-		   tg_pt_gp->tg_pt_gp_alua_pending_state);
 
 	pr_debug("Successful %s ALUA transition TG PT Group: %s ID: %hu"
 		" from primary access state %s to %s\n", (explicit) ? "explicit" :
 		"implicit", config_item_name(&tg_pt_gp->tg_pt_gp_group.cg_item),
 		tg_pt_gp->tg_pt_gp_id,
-		core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_previous_state),
-		core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));
+		core_alua_dump_state(prev_state),
+		core_alua_dump_state(new_state));
 
 	core_alua_queue_state_change_ua(tg_pt_gp);
 
-	spin_lock(&dev->t10_alua.tg_pt_gps_lock);
-	atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
-	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
-
-	if (tg_pt_gp->tg_pt_gp_transition_complete)
-		complete(tg_pt_gp->tg_pt_gp_transition_complete);
-}
-
-static int core_alua_do_transition_tg_pt(
-	struct t10_alua_tg_pt_gp *tg_pt_gp,
-	int new_state,
-	int explicit)
-{
-	struct se_device *dev = tg_pt_gp->tg_pt_gp_dev;
-	DECLARE_COMPLETION_ONSTACK(wait);
-
-	/* Nothing to be done here */
-	if (atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state) == new_state)
-		return 0;
-
-	if (explicit && new_state == ALUA_ACCESS_STATE_TRANSITION)
-		return -EAGAIN;
-
-	/*
-	 * Flush any pending transitions
-	 */
-	if (!explicit)
-		flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
-
-	/*
-	 * Save the old primary ALUA access state, and set the current state
-	 * to ALUA_ACCESS_STATE_TRANSITION.
-	 */
-	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-			ALUA_ACCESS_STATE_TRANSITION);
-	tg_pt_gp->tg_pt_gp_alua_access_status = (explicit) ?
-				ALUA_STATUS_ALTERED_BY_EXPLICIT_STPG :
-				ALUA_STATUS_ALTERED_BY_IMPLICIT_ALUA;
-
-	core_alua_queue_state_change_ua(tg_pt_gp);
-
-	if (new_state == ALUA_ACCESS_STATE_TRANSITION)
-		return 0;
-
-	tg_pt_gp->tg_pt_gp_alua_previous_state =
-		atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
-	tg_pt_gp->tg_pt_gp_alua_pending_state = new_state;
-
-	/*
-	 * Check for the optional ALUA primary state transition delay
-	 */
-	if (tg_pt_gp->tg_pt_gp_trans_delay_msecs != 0)
-		msleep_interruptible(tg_pt_gp->tg_pt_gp_trans_delay_msecs);
-
-	/*
-	 * Take a reference for workqueue item
-	 */
-	spin_lock(&dev->t10_alua.tg_pt_gps_lock);
-	atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
-	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
-
-	schedule_work(&tg_pt_gp->tg_pt_gp_transition_work);
-	if (explicit) {
-		tg_pt_gp->tg_pt_gp_transition_complete = &wait;
-		wait_for_completion(&wait);
-		tg_pt_gp->tg_pt_gp_transition_complete = NULL;
-	}
-
+	mutex_unlock(&tg_pt_gp->tg_pt_gp_transition_mutex);
 	return 0;
 }
 
@@ -1685,14 +1649,12 @@ struct t10_alua_tg_pt_gp *core_alua_allocate_tg_pt_gp(struct se_device *dev,
 	}
 	INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_list);
 	INIT_LIST_HEAD(&tg_pt_gp->tg_pt_gp_lun_list);
-	mutex_init(&tg_pt_gp->tg_pt_gp_md_mutex);
+	mutex_init(&tg_pt_gp->tg_pt_gp_transition_mutex);
 	spin_lock_init(&tg_pt_gp->tg_pt_gp_lock);
 	atomic_set(&tg_pt_gp->tg_pt_gp_ref_cnt, 0);
-	INIT_WORK(&tg_pt_gp->tg_pt_gp_transition_work,
-		  core_alua_do_transition_tg_pt_work);
 	tg_pt_gp->tg_pt_gp_dev = dev;
-	atomic_set(&tg_pt_gp->tg_pt_gp_alua_access_state,
-		ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED);
+	tg_pt_gp->tg_pt_gp_alua_access_state =
+			ALUA_ACCESS_STATE_ACTIVE_OPTIMIZED;
 	/*
 	 * Enable both explicit and implicit ALUA support by default
 	 */
@@ -1797,8 +1759,6 @@ void core_alua_free_tg_pt_gp(
 	dev->t10_alua.alua_tg_pt_gps_counter--;
 	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
-	flush_work(&tg_pt_gp->tg_pt_gp_transition_work);
-
 	/*
 	 * Allow a struct t10_alua_tg_pt_gp_member * referenced by
 	 * core_alua_get_tg_pt_gp_by_name() in
@@ -1938,8 +1898,8 @@ ssize_t core_alua_show_tg_pt_gp_info(struct se_lun *lun, char *page)
 			"Primary Access Status: %s\nTG Port Secondary Access"
 			" State: %s\nTG Port Secondary Access Status: %s\n",
 			config_item_name(tg_pt_ci), tg_pt_gp->tg_pt_gp_id,
-			core_alua_dump_state(atomic_read(
-					&tg_pt_gp->tg_pt_gp_alua_access_state)),
+			core_alua_dump_state(
+				tg_pt_gp->tg_pt_gp_alua_access_state),
 			core_alua_dump_status(
 				tg_pt_gp->tg_pt_gp_alua_access_status),
 			atomic_read(&lun->lun_tg_pt_secondary_offline) ?
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 38b5025e4c7a..70657fd56440 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -2392,7 +2392,7 @@ static ssize_t target_tg_pt_gp_alua_access_state_show(struct config_item *item,
 		char *page)
 {
 	return sprintf(page, "%d\n",
-		atomic_read(&to_tg_pt_gp(item)->tg_pt_gp_alua_access_state));
+		       to_tg_pt_gp(item)->tg_pt_gp_alua_access_state);
 }
 
 static ssize_t target_tg_pt_gp_alua_access_state_store(struct config_item *item,
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 730ed3055336..ccfad0e9c2cd 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -280,8 +280,6 @@ struct t10_alua_tg_pt_gp {
 	u16	tg_pt_gp_id;
 	int	tg_pt_gp_valid_id;
 	int	tg_pt_gp_alua_supported_states;
-	int	tg_pt_gp_alua_pending_state;
-	int	tg_pt_gp_alua_previous_state;
 	int	tg_pt_gp_alua_access_status;
 	int	tg_pt_gp_alua_access_type;
 	int	tg_pt_gp_nonop_delay_msecs;
@@ -290,18 +288,16 @@ struct t10_alua_tg_pt_gp {
 	int	tg_pt_gp_pref;
 	int	tg_pt_gp_write_metadata;
 	u32	tg_pt_gp_members;
-	atomic_t tg_pt_gp_alua_access_state;
+	int	tg_pt_gp_alua_access_state;
 	atomic_t tg_pt_gp_ref_cnt;
 	spinlock_t tg_pt_gp_lock;
-	struct mutex tg_pt_gp_md_mutex;
+	struct mutex tg_pt_gp_transition_mutex;
 	struct se_device *tg_pt_gp_dev;
 	struct config_group tg_pt_gp_group;
 	struct list_head tg_pt_gp_list;
 	struct list_head tg_pt_gp_lun_list;
 	struct se_lun *tg_pt_gp_alua_lun;
 	struct se_node_acl *tg_pt_gp_alua_nacl;
-	struct work_struct tg_pt_gp_transition_work;
-	struct completion *tg_pt_gp_transition_complete;
 };
 
 struct t10_vpd {

From 6c9a8cdad48a04795dbc35ac3370afa3180045ae Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 27 Mar 2017 21:21:43 +0100
Subject: [PATCH 140/410] drm/i915: Avoid lock dropping between rescheduling

Unlocking is dangerous. In this case we combine an early update to the
out-of-queue request, because we know that it will be inserted into the
correct FIFO priority-ordered slot when it becomes ready in the future.
However, given sufficient enthusiasm, it may become ready as we are
continuing to reschedule, and so may gazump the FIFO if we have since
dropped its spinlock. The result is that it may be executed too early,
before its dependencies.

v2: Move all work into the second phase over the topological sort. This
removes the shortcut on the out-of-rbtree request to ensure that we only
adjust its priority after adjusting all of its dependencies.

Fixes: 20311bd35060 ("drm/i915/scheduler: Execute requests in order of priorities")
Testcase: igt/gem_exec_whisper
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Link: http://patchwork.freedesktop.org/patch/msgid/20170327202143.7972-1-chris@chris-wilson.co.uk
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
(cherry picked from commit a79a524e9260d4ffaff88348615e70fb3d393692)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 53 +++++++++++++++-----------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 91555d4e9129..47517a02f0a4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -670,15 +670,14 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 static struct intel_engine_cs *
 pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 {
-	struct intel_engine_cs *engine;
+	struct intel_engine_cs *engine =
+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+
+	GEM_BUG_ON(!locked);
 
-	engine = container_of(pt,
-			      struct drm_i915_gem_request,
-			      priotree)->engine;
 	if (engine != locked) {
-		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
+		spin_unlock(&locked->timeline->lock);
+		spin_lock(&engine->timeline->lock);
 	}
 
 	return engine;
@@ -686,7 +685,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 
 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
 	LIST_HEAD(dfs);
@@ -720,26 +719,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
 
-		list_for_each_entry(p, &pt->signalers_list, signal_link)
+		/* Within an engine, there can be no cycle, but we may
+		 * refer to the same dependency chain multiple times
+		 * (redundant dependencies are not eliminated) and across
+		 * engines.
+		 */
+		list_for_each_entry(p, &pt->signalers_list, signal_link) {
+			GEM_BUG_ON(p->signaler->priority < pt->priority);
 			if (prio > READ_ONCE(p->signaler->priority))
 				list_move_tail(&p->dfs_link, &dfs);
+		}
 
 		list_safe_reset_next(dep, p, dfs_link);
-		if (!RB_EMPTY_NODE(&pt->node))
-			continue;
-
-		engine = pt_lock_engine(pt, engine);
-
-		/* If it is not already in the rbtree, we can update the
-		 * priority inplace and skip over it (and its dependencies)
-		 * if it is referenced *again* as we descend the dfs.
-		 */
-		if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
-			pt->priority = prio;
-			list_del_init(&dep->dfs_link);
-		}
 	}
 
+	engine = request->engine;
+	spin_lock_irq(&engine->timeline->lock);
+
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
@@ -751,16 +747,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (prio <= pt->priority)
 			continue;
 
-		GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
-
 		pt->priority = prio;
-		rb_erase(&pt->node, &engine->execlist_queue);
-		if (insert_request(pt, &engine->execlist_queue))
-			engine->execlist_first = &pt->node;
+		if (!RB_EMPTY_NODE(&pt->node)) {
+			rb_erase(&pt->node, &engine->execlist_queue);
+			if (insert_request(pt, &engine->execlist_queue))
+				engine->execlist_first = &pt->node;
+		}
 	}
 
-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	spin_unlock_irq(&engine->timeline->lock);
 
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }

From 729a0cd45c886a8d1ae0b3063b20d525fc729523 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Mon, 27 Mar 2017 17:41:02 +0800
Subject: [PATCH 141/410] drm/i915/gvt: adjust mem size for low resolution type

From commit d1a513be1f0a ("drm/i915/gvt: add resolution definition for vGPU
type"), small type has been restricted to small resolution, so not
require larger high GM size any more. Change to smaller 384M for more
VM creation with vGPU enabled which still perform reasonable workload.

Fixes: d1a513be1f0a ("drm/i915/gvt: add resolution definition for vGPU type")
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
(cherry picked from commit bf39ec335eb8cc51b4e1c9303ef92b380d204bb1)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gvt/vgpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 41cfa5ccae84..d8d128625331 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -72,7 +72,7 @@ static struct {
 	char *name;
 } vgpu_types[] = {
 /* Fixed vGPU type table */
-	{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" },
+	{ MB_TO_BYTES(64), MB_TO_BYTES(384), 4, GVT_EDID_1024_768, "8" },
 	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" },
 	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" },
 	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" },

From 9ba2a6261de49588714a25f49db80bbe961b870a Mon Sep 17 00:00:00 2001
From: Tina Zhang <tina.zhang@intel.com>
Date: Fri, 24 Mar 2017 01:56:54 -0400
Subject: [PATCH 142/410] drm/i915/gvt: remove the redundant info NULL check

The variable info is never NULL, which is checked by the caller. This
patch removes the redundant info NULL check logic.

Fixes: 695fbc08d80f ("drm/i915/gvt: replace the gvt_err with gvt_vgpu_err")
Signed-off-by: Tina Zhang <tina.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
(cherry picked from commit 865f03d42ed0c90c9faf3301775176834ba13eba)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index d641214578a7..2d92119b488c 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1340,13 +1340,6 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
 
 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
 {
-	struct intel_vgpu *vgpu = info->vgpu;
-
-	if (!info) {
-		gvt_vgpu_err("kvmgt_guest_info invalid\n");
-		return false;
-	}
-
 	kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
 	kvm_put_kvm(info->kvm);
 	kvmgt_protect_table_destroy(info);

From 1383aeca92b72f4179420820c3a64dfb5909cc97 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 30 Mar 2017 09:53:41 +0100
Subject: [PATCH 143/410] drm/i915: Ironlake do_idle_maps w/a may be called w/o
 struct_mutex

Since commit 1233e2db199d ("drm/i915: Move object backing storage
manipulation to its own locking"), i915_gem_object_put_pages() and
specifically the i915_gem_gtt_finish_pages() may be called from outside
of the struct_mutex and so we can no longer pass I915_WAIT_LOCKED to
i915_gem_wait_for_idle.

Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation to its own locking")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: intel-gfx@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v4.10+
Link: http://patchwork.freedesktop.org/patch/msgid/20170330085341.20311-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
(cherry picked from commit 228ec87ccd040b620c467cd61d594bfaa4f8a12e)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 2801a4d56324..96e45a4d5441 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2704,7 +2704,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
+		if (i915_gem_wait_for_idle(dev_priv, 0)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);

From ecf8e89917d600fe846ebda911a9e690c6babfd0 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 30 Mar 2017 12:16:14 +0100
Subject: [PATCH 144/410] drm/i915: Use a dummy timeline name for a signaled
 fence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Michał Winiarski pointed out that the debugging infrastructure (such as
trace_dma_fence_release) likes to pretty print the timeline name, long
after we have freed the timeline. Our timelines currently live as part of
the GTT (due to the strict ordering we currently use through each) which
belong to the context. We aim to free the context and release its
hardware resources as soon as we able to (i.e. when the last
fence/request using it has been signaled and retired). As the
.get_timeline_name is purely a debug feature, rather than extending the
lifetime of the context, or splitting it into many different release
phases just to keep the name around, replace the timeline name with a
constant after the fence has been signaled. This avoids the potential
use-after-free.

Reported-by: Krzysztof Olinski <krzysztof.e.olinski@intel.com>
Fixes: 80b204bce8f2 ("drm/i915: Enable multiple timelines")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.10+
Link: http://patchwork.freedesktop.org/patch/msgid/20170330111614.29757-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Michał Winiarski <michal.winiarski@intel.com>
(cherry picked from commit 05506b5be081b728353f1612b05c8ff689772832)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_request.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e7c3c0318ff6..da70bfe97ec5 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -37,6 +37,17 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 
 static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
 {
+	/* The timeline struct (as part of the ppgtt underneath a context)
+	 * may be freed when the request is no longer in use by the GPU.
+	 * We could extend the life of a context to beyond that of all
+	 * fences, possibly keeping the hw resource around indefinitely,
+	 * or we just give them a false name. Since
+	 * dma_fence_ops.get_timeline_name is a debug feature, the occasional
+	 * lie seems justifiable.
+	 */
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return "signaled";
+
 	return to_request(fence)->timeline->common->name;
 }
 

From 7a0c5c5b834fb60764b494b0e39c239da3b0774b Mon Sep 17 00:00:00 2001
From: Dmitry Bilunov <kmeaw@yandex-team.ru>
Date: Thu, 30 Mar 2017 18:14:26 +0300
Subject: [PATCH 145/410] dm raid: fix NULL pointer dereference for raid1
 without bitmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 4257e08 ("dm raid: support to change bitmap region size")
introduced a bitmap resize call during preresume phase. User can create
a DM device with "raid" target configured as raid1 with no metadata
devices to hold superblock/bitmap info. It can be achieved using the
following sequence:

  truncate -s 32M /dev/shm/raid-test
  LOOP=$(losetup --show -f /dev/shm/raid-test)
  dmsetup create raid-test-linear0 --table "0 1024 linear $LOOP 0"
  dmsetup create raid-test-linear1 --table "0 1024 linear $LOOP 1024"
  dmsetup create raid-test --table "0 1024 raid raid1 1 2048 2 - /dev/mapper/raid-test-linear0 - /dev/mapper/raid-test-linear1"

This results in the following crash:

[ 4029.110216] device-mapper: raid: Ignoring chunk size parameter for RAID 1
[ 4029.110217] device-mapper: raid: Choosing default region size of 4MiB
[ 4029.111349] md/raid1:mdX: active with 2 out of 2 mirrors
[ 4029.114770] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
[ 4029.114802] IP: bitmap_resize+0x25/0x7c0 [md_mod]
[ 4029.114816] PGD 0
…
[ 4029.115059] Hardware name: Aquarius Pro P30 S85 BUY-866/B85M-E, BIOS 2304 05/25/2015
[ 4029.115079] task: ffff88015cc29a80 task.stack: ffffc90001a5c000
[ 4029.115097] RIP: 0010:bitmap_resize+0x25/0x7c0 [md_mod]
[ 4029.115112] RSP: 0018:ffffc90001a5fb68 EFLAGS: 00010246
[ 4029.115127] RAX: 0000000000000005 RBX: 0000000000000000 RCX: 0000000000000000
[ 4029.115146] RDX: 0000000000000000 RSI: 0000000000000400 RDI: 0000000000000000
[ 4029.115166] RBP: ffffc90001a5fc28 R08: 0000000800000000 R09: 00000008ffffffff
[ 4029.115185] R10: ffffea0005661600 R11: ffff88015cc29a80 R12: ffff88021231f058
[ 4029.115204] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[ 4029.115223] FS:  00007fe73a6b4740(0000) GS:ffff88021ea80000(0000) knlGS:0000000000000000
[ 4029.115245] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 4029.115261] CR2: 0000000000000030 CR3: 0000000159a74000 CR4: 00000000001426e0
[ 4029.115281] Call Trace:
[ 4029.115291]  ? raid_iterate_devices+0x63/0x80 [dm_raid]
[ 4029.115309]  ? dm_table_all_devices_attribute.isra.23+0x41/0x70 [dm_mod]
[ 4029.115329]  ? dm_table_set_restrictions+0x225/0x2d0 [dm_mod]
[ 4029.115346]  raid_preresume+0x81/0x2e0 [dm_raid]
[ 4029.115361]  dm_table_resume_targets+0x47/0xe0 [dm_mod]
[ 4029.115378]  dm_resume+0xa8/0xd0 [dm_mod]
[ 4029.115391]  dev_suspend+0x123/0x250 [dm_mod]
[ 4029.115405]  ? table_load+0x350/0x350 [dm_mod]
[ 4029.115419]  ctl_ioctl+0x1c2/0x490 [dm_mod]
[ 4029.115433]  dm_ctl_ioctl+0xe/0x20 [dm_mod]
[ 4029.115447]  do_vfs_ioctl+0x8d/0x5a0
[ 4029.115459]  ? ____fput+0x9/0x10
[ 4029.115470]  ? task_work_run+0x79/0xa0
[ 4029.115481]  SyS_ioctl+0x3c/0x70
[ 4029.115493]  entry_SYSCALL_64_fastpath+0x13/0x94

The raid_preresume() function incorrectly assumes that the raid_set has
a bitmap enabled if RT_FLAG_RS_BITMAP_LOADED is set.  But
RT_FLAG_RS_BITMAP_LOADED is getting set in __load_dirty_region_bitmap()
even if there is no bitmap present (and bitmap_load() happily returns 0
even if a bitmap isn't present).  So the only way forward in the
near-term is to check if the bitmap is present by seeing if
mddev->bitmap is not NULL after bitmap_load() has been called.

By doing so the above NULL pointer is avoided.

Fixes: 4257e08 ("dm raid: support to change bitmap region size")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Dmitry Bilunov <kmeaw@yandex-team.ru>
Signed-off-by: Andrey Smetanin <asmetanin@yandex-team.ru>
Acked-by: Heinz Mauelshagen <heinzm@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-raid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index f8564d63982f..1e217ba84d09 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3726,7 +3726,7 @@ static int raid_preresume(struct dm_target *ti)
 		return r;
 
 	/* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */
-	if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) &&
+	if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
 	    mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) {
 		r = bitmap_resize(mddev->bitmap, mddev->dev_sectors,
 				  to_bytes(rs->requested_bitmap_chunk_sectors), 0);

From 86e3e83b443669dd2bcc5c8a83b23e3aa0694c0d Mon Sep 17 00:00:00 2001
From: Sami Tolvanen <samitolvanen@google.com>
Date: Fri, 31 Mar 2017 12:32:45 -0700
Subject: [PATCH 146/410] dm verity fec: fix bufio leaks

Buffers read through dm_bufio_read() were not released in all code paths.

Fixes: a739ff3f543a ("dm verity: add support for forward error correction")
Cc: stable@vger.kernel.org # v4.5+
Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-verity-fec.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index c3cc04d89524..78f36012eaca 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -146,8 +146,6 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
 		block = fec_buffer_rs_block(v, fio, n, i);
 		res = fec_decode_rs8(v, fio, block, &par[offset], neras);
 		if (res < 0) {
-			dm_bufio_release(buf);
-
 			r = res;
 			goto error;
 		}
@@ -172,6 +170,8 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
 done:
 	r = corrected;
 error:
+	dm_bufio_release(buf);
+
 	if (r < 0 && neras)
 		DMERR_LIMIT("%s: FEC %llu: failed to correct: %d",
 			    v->data_dev->name, (unsigned long long)rsb, r);
@@ -269,7 +269,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
 					  &is_zero) == 0) {
 			/* skip known zero blocks entirely */
 			if (is_zero)
-				continue;
+				goto done;
 
 			/*
 			 * skip if we have already found the theoretical

From fdad4e7a876a2cb3d2c1f04e5418c324e79fffef Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 1 Apr 2017 00:45:52 +0200
Subject: [PATCH 147/410] ACPI / scan: Prefer devices without _HID for _ADR
 matching

Commit c2a6bbaf0c5f (ACPI / scan: Prefer devices without _HID/_CID
for _ADR matching) added a list_empty(&adev->pnp.ids) check to
find_child_checks() so as to catch situations in which the ACPI
core attempts to decode _ADR for a device having a _HID too which
is strictly against the spec.  However, it overlooked the fact that
the adev->pnp.ids list for the devices taken into account by
find_child_checks() may contain device IDs set internally by the
kernel, like "LNXVIDEO" (thanks to Zhang Rui for that realization),
and it broke the enumeration of those devices as a result.

To unbreak it, replace the overly coarse grained list_empty()
check with a much more precise check against the pnp.type.platform_id
flag which is only set for devices having a _HID (that's how it
should be done from the start, as having both _ADR and _CID is
actually permitted).

Fixes: c2a6bbaf0c5f (ACPI / scan: Prefer devices without _HID/_CID for _ADR matching)
Link: https://bugzilla.kernel.org/show_bug.cgi?id=194889
Reported-and-tested-by: Mike <mike@mikewilson.me.uk>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Cc: 4.10+ <stable@vger.kernel.org> # 4.10+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/glue.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index fb19e1cdb641..edc8663b5db3 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -99,13 +99,13 @@ static int find_child_checks(struct acpi_device *adev, bool check_children)
 		return -ENODEV;
 
 	/*
-	 * If the device has a _HID (or _CID) returning a valid ACPI/PNP
-	 * device ID, it is better to make it look less attractive here, so that
-	 * the other device with the same _ADR value (that may not have a valid
-	 * device ID) can be matched going forward.  [This means a second spec
-	 * violation in a row, so whatever we do here is best effort anyway.]
+	 * If the device has a _HID returning a valid ACPI/PNP device ID, it is
+	 * better to make it look less attractive here, so that the other device
+	 * with the same _ADR value (that may not have a valid device ID) can be
+	 * matched going forward.  [This means a second spec violation in a row,
+	 * so whatever we do here is best effort anyway.]
 	 */
-	return sta_present && list_empty(&adev->pnp.ids) ?
+	return sta_present && !adev->pnp.type.platform_id ?
 			FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE;
 }
 

From e640cc306388b6f9dc8109d5c5d0550d7e69e5f7 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Fri, 31 Mar 2017 15:58:40 -0700
Subject: [PATCH 148/410] xtensa: fix stack dump output

Use %pB in pr_cont format string instead of calling print_symbol
separately. It turns

  [   19.166249] Call Trace:
  [   19.167265]  [<a000e50a>]
  [   19.167843]  __warn+0x92/0xa0
  [   19.169656]  [<a000e554>]
  [   19.170059]  warn_slowpath_fmt+0x3c/0x40
  [   19.171934]  [<a02d5bd8>]

back into

  [   18.123240] Call Trace:
  [   18.125039]  [<a000e4f6>] __warn+0x92/0xa0
  [   18.126961]  [<a000e540>] warn_slowpath_fmt+0x3c/0x40

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/traps.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c
index c82c43bff296..bae697a06a98 100644
--- a/arch/xtensa/kernel/traps.c
+++ b/arch/xtensa/kernel/traps.c
@@ -483,10 +483,8 @@ void show_regs(struct pt_regs * regs)
 
 static int show_trace_cb(struct stackframe *frame, void *data)
 {
-	if (kernel_text_address(frame->pc)) {
-		pr_cont(" [<%08lx>]", frame->pc);
-		print_symbol(" %s\n", frame->pc);
-	}
+	if (kernel_text_address(frame->pc))
+		pr_cont(" [<%08lx>] %pB\n", frame->pc, (void *)frame->pc);
 	return 0;
 }
 

From 1493aa65ad076293722908f03bab3d4bf3dc3638 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Fri, 31 Mar 2017 16:26:21 -0700
Subject: [PATCH 149/410] xtensa: wire up statx system call

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/uapi/asm/unistd.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index cd400af4a6b2..6be7eb27fd29 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -774,7 +774,10 @@ __SYSCALL(349, sys_pkey_alloc, 2)
 #define __NR_pkey_free				350
 __SYSCALL(350, sys_pkey_free, 1)
 
-#define __NR_syscall_count			351
+#define __NR_statx				351
+__SYSCALL(351, sys_statx, 5)
+
+#define __NR_syscall_count			352
 
 /*
  * sysxtensa syscall handler

From aa4ce4493c88dc324911152d1ccd25469366dba3 Mon Sep 17 00:00:00 2001
From: Zhi Wang <zhi.a.wang@intel.com>
Date: Sat, 1 Apr 2017 00:00:53 +0800
Subject: [PATCH 150/410] drm/i915/gvt: Fix firmware loading interface for
 GVT-g golden HW state

Firmware loading interface for GVT-g golden HW state has been broken
before. This patch fixes GVT-g firmware loading interface. A user should
apply this patch if he wants to load GVT-g golden HW state from firmware
interface.

Fixes: 579cea5 ("drm/i915/gvt: golden virtual HW state management")
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: drm-intel-fixes@lists.freedesktop.org
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/firmware.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 933a7c211a1c..dce8d15f706f 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -75,11 +75,11 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 	struct gvt_firmware_header *h;
 	void *firmware;
 	void *p;
-	unsigned long size;
+	unsigned long size, crc32_start;
 	int i;
 	int ret;
 
-	size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1;
+	size = sizeof(*h) + info->mmio_size + info->cfg_space_size;
 	firmware = vzalloc(size);
 	if (!firmware)
 		return -ENOMEM;
@@ -112,6 +112,9 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 
 	memcpy(gvt->firmware.mmio, p, info->mmio_size);
 
+	crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4;
+	h->crc32 = crc32_le(0, firmware + crc32_start, size - crc32_start);
+
 	firmware_attr.size = size;
 	firmware_attr.private = firmware;
 
@@ -234,7 +237,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt)
 
 	firmware->mmio = mem;
 
-	sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%04x.golden_hw_state",
+	sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%02x.golden_hw_state",
 		 GVT_FIRMWARE_PATH, pdev->vendor, pdev->device,
 		 pdev->revision);
 

From b917078c1c107ce34264af893e436e6115eeb9f6 Mon Sep 17 00:00:00 2001
From: Daode Huang <huangdaode@hisilicon.com>
Date: Thu, 30 Mar 2017 16:37:41 +0100
Subject: [PATCH 151/410] net: hns: Add ACPI support to check SFP present

The current code only supports DT to check SFP present.
This patch adds ACPI support as well.

Signed-off-by: Daode Huang <huangdaode@hisilicon.com>
Reviewed-by: Yisen Zhuang <yisen.zhuang@huawei.com>
Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 11 +++++---
 .../ethernet/hisilicon/hns/hns_dsaf_misc.c    | 28 ++++++++++++++++++-
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 3239d27143b9..bdd8cdd732fb 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -82,9 +82,12 @@ void hns_mac_get_link_status(struct hns_mac_cb *mac_cb, u32 *link_status)
 	else
 		*link_status = 0;
 
-	ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb, &sfp_prsnt);
-	if (!ret)
-		*link_status = *link_status && sfp_prsnt;
+	if (mac_cb->media_type == HNAE_MEDIA_TYPE_FIBER) {
+		ret = mac_cb->dsaf_dev->misc_op->get_sfp_prsnt(mac_cb,
+							       &sfp_prsnt);
+		if (!ret)
+			*link_status = *link_status && sfp_prsnt;
+	}
 
 	mac_cb->link = *link_status;
 }
@@ -855,7 +858,7 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 		of_node_put(np);
 
 		np = of_parse_phandle(to_of_node(mac_cb->fw_port),
-					"serdes-syscon", 0);
+				      "serdes-syscon", 0);
 		syscon = syscon_node_to_regmap(np);
 		of_node_put(np);
 		if (IS_ERR_OR_NULL(syscon)) {
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index a2c22d084ce9..e13aa064a8e9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -461,6 +461,32 @@ int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
 	return 0;
 }
 
+int hns_mac_get_sfp_prsnt_acpi(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
+{
+	union acpi_object *obj;
+	union acpi_object obj_args, argv4;
+
+	obj_args.integer.type = ACPI_TYPE_INTEGER;
+	obj_args.integer.value = mac_cb->mac_id;
+
+	argv4.type = ACPI_TYPE_PACKAGE,
+	argv4.package.count = 1,
+	argv4.package.elements = &obj_args,
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
+				hns_dsaf_acpi_dsm_uuid, 0,
+				HNS_OP_GET_SFP_STAT_FUNC, &argv4);
+
+	if (!obj || obj->type != ACPI_TYPE_INTEGER)
+		return -ENODEV;
+
+	*sfp_prsnt = obj->integer.value;
+
+	ACPI_FREE(obj);
+
+	return 0;
+}
+
 /**
  * hns_mac_config_sds_loopback - set loop back for serdes
  * @mac_cb: mac control block
@@ -592,7 +618,7 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
 		misc_op->hns_dsaf_roce_srst = hns_dsaf_roce_srst_acpi;
 
 		misc_op->get_phy_if = hns_mac_get_phy_if_acpi;
-		misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt;
+		misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt_acpi;
 
 		misc_op->cfg_serdes_loopback = hns_mac_config_sds_loopback_acpi;
 	} else {

From 3af887c38f1ae0db66c00c4ee2e8a0b1e99ffc29 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 30 Mar 2017 17:00:12 +0100
Subject: [PATCH 152/410] net/faraday: Explicitly include linux/of.h and
 linux/property.h

This driver uses interfaces from linux/of.h and linux/property.h but
relies on implict inclusion of those headers which means that changes in
other headers could break the build, as happened in -next for arm today.
Add a explicit includes.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftgmac100.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 928b0df2b8e0..ade6b3e4ed13 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -28,8 +28,10 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/of.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
+#include <linux/property.h>
 #include <net/ip.h>
 #include <net/ncsi.h>
 

From 6f56f6186c18e3fd54122b73da68e870687b8c59 Mon Sep 17 00:00:00 2001
From: Yi-Hung Wei <yihung.wei@gmail.com>
Date: Thu, 30 Mar 2017 12:36:03 -0700
Subject: [PATCH 153/410] openvswitch: Fix ovs_flow_key_update()

ovs_flow_key_update() is called when the flow key is invalid, and it is
used to update and revalidate the flow key. Commit 329f45bc4f19
("openvswitch: add mac_proto field to the flow key") introduces mac_proto
field to flow key and use it to determine whether the flow key is valid.
However, the commit does not update the code path in ovs_flow_key_update()
to revalidate the flow key which may cause BUG_ON() on execute_recirc().
This patch addresses the aforementioned issue.

Fixes: 329f45bc4f19 ("openvswitch: add mac_proto field to the flow key")
Signed-off-by: Yi-Hung Wei <yihung.wei@gmail.com>
Acked-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9d4bb8eb63f2..3f76cb765e5b 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -527,7 +527,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 	/* Link layer. */
 	clear_vlan(key);
-	if (key->mac_proto == MAC_PROTO_NONE) {
+	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
 		if (unlikely(eth_type_vlan(skb->protocol)))
 			return -EINVAL;
 
@@ -745,7 +745,13 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
 int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	return key_extract(skb, key);
+	int res;
+
+	res = key_extract(skb, key);
+	if (!res)
+		key->mac_proto &= ~SW_FLOW_KEY_INVALID;
+
+	return res;
 }
 
 static int key_extract_mac_proto(struct sk_buff *skb)

From d5b07ccc1bf5fd2ccc6bf9da5677fc448a972e32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Rebe?= <rene@exactcode.com>
Date: Tue, 28 Mar 2017 07:56:51 +0200
Subject: [PATCH 154/410] r8152: The Microsoft Surface docks also use R8152 v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without this the generic cdc_ether grabs the device,
and does not really work.

Signed-off-by: René Rebe <rene@exactcode.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ether.c | 15 +++++++++++++++
 drivers/net/usb/r8152.c     |  3 +++
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index f5552aaaa77a..f3ae88fdf332 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -532,6 +532,7 @@ static const struct driver_info wwan_info = {
 #define LENOVO_VENDOR_ID	0x17ef
 #define NVIDIA_VENDOR_ID	0x0955
 #define HP_VENDOR_ID		0x03f0
+#define MICROSOFT_VENDOR_ID	0x045e
 
 static const struct usb_device_id	products[] = {
 /* BLACKLIST !!
@@ -761,6 +762,20 @@ static const struct usb_device_id	products[] = {
 	.driver_info = 0,
 },
 
+/* Microsoft Surface 2 dock (based on Realtek RTL8152) */
+{
+	USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07ab, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+	.driver_info = 0,
+},
+
+/* Microsoft Surface 3 dock (based on Realtek RTL8153) */
+{
+	USB_DEVICE_AND_INTERFACE_INFO(MICROSOFT_VENDOR_ID, 0x07c6, USB_CLASS_COMM,
+			USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+	.driver_info = 0,
+},
+
 /* WHITELIST!!!
  *
  * CDC Ether uses two interfaces, not necessarily consecutive.
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index c34df33c6d72..07f788c49d57 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -517,6 +517,7 @@ enum rtl8152_flags {
 
 /* Define these values to match your device */
 #define VENDOR_ID_REALTEK		0x0bda
+#define VENDOR_ID_MICROSOFT		0x045e
 #define VENDOR_ID_SAMSUNG		0x04e8
 #define VENDOR_ID_LENOVO		0x17ef
 #define VENDOR_ID_NVIDIA		0x0955
@@ -4521,6 +4522,8 @@ static void rtl8152_disconnect(struct usb_interface *intf)
 static struct usb_device_id rtl8152_table[] = {
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
+	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07ab)},
+	{REALTEK_USB_DEVICE(VENDOR_ID_MICROSOFT, 0x07c6)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x304f)},
 	{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x3062)},

From fce366a9dd0ddc47e7ce05611c266e8574a45116 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 31 Mar 2017 02:24:02 +0200
Subject: [PATCH 155/410] bpf, verifier: fix alu ops against map_value{, _adj}
 register types

While looking into map_value_adj, I noticed that alu operations
directly on the map_value() resp. map_value_adj() register (any
alu operation on a map_value() register will turn it into a
map_value_adj() typed register) are not sufficiently protected
against some of the operations. Two non-exhaustive examples are
provided that the verifier needs to reject:

 i) BPF_AND on r0 (map_value_adj):

  0: (bf) r2 = r10
  1: (07) r2 += -8
  2: (7a) *(u64 *)(r2 +0) = 0
  3: (18) r1 = 0xbf842a00
  5: (85) call bpf_map_lookup_elem#1
  6: (15) if r0 == 0x0 goto pc+2
   R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
  7: (57) r0 &= 8
  8: (7a) *(u64 *)(r0 +0) = 22
   R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=8 R10=fp
  9: (95) exit

  from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp
  9: (95) exit
  processed 10 insns

ii) BPF_ADD in 32 bit mode on r0 (map_value_adj):

  0: (bf) r2 = r10
  1: (07) r2 += -8
  2: (7a) *(u64 *)(r2 +0) = 0
  3: (18) r1 = 0xc24eee00
  5: (85) call bpf_map_lookup_elem#1
  6: (15) if r0 == 0x0 goto pc+2
   R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
  7: (04) (u32) r0 += (u32) 0
  8: (7a) *(u64 *)(r0 +0) = 22
   R0=map_value_adj(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
  9: (95) exit

  from 6 to 9: R0=inv,min_value=0,max_value=0 R10=fp
  9: (95) exit
  processed 10 insns

Issue is, while min_value / max_value boundaries for the access
are adjusted appropriately, we change the pointer value in a way
that cannot be sufficiently tracked anymore from its origin.
Operations like BPF_{AND,OR,DIV,MUL,etc} on a destination register
that is PTR_TO_MAP_VALUE{,_ADJ} was probably unintended, in fact,
all the test cases coming with 484611357c19 ("bpf: allow access
into map value arrays") perform BPF_ADD only on the destination
register that is PTR_TO_MAP_VALUE_ADJ.

Only for UNKNOWN_VALUE register types such operations make sense,
f.e. with unknown memory content fetched initially from a constant
offset from the map value memory into a register. That register is
then later tested against lower / upper bounds, so that the verifier
can then do the tracking of min_value / max_value, and properly
check once that UNKNOWN_VALUE register is added to the destination
register with type PTR_TO_MAP_VALUE{,_ADJ}. This is also what the
original use-case is solving. Note, tracking on what is being
added is done through adjust_reg_min_max_vals() and later access
to the map value enforced with these boundaries and the given offset
from the insn through check_map_access_adj().

Tests will fail for non-root environment due to prohibited pointer
arithmetic, in particular in check_alu_op(), we bail out on the
is_pointer_value() check on the dst_reg (which is false in root
case as we allow for pointer arithmetic via env->allow_ptr_leaks).

Similarly to PTR_TO_PACKET, one way to fix it is to restrict the
allowed operations on PTR_TO_MAP_VALUE{,_ADJ} registers to 64 bit
mode BPF_ADD. The test_verifier suite runs fine after the patch
and it also rejects mentioned test cases.

Fixes: 484611357c19 ("bpf: allow access into map value arrays")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5e6202e62265..86deddecff25 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1925,6 +1925,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		 * register as unknown.
 		 */
 		if (env->allow_ptr_leaks &&
+		    BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
 		    (dst_reg->type == PTR_TO_MAP_VALUE ||
 		     dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
 			dst_reg->type = PTR_TO_MAP_VALUE_ADJ;

From 79adffcd6489ef43bda2dfded3d637d7fb4fac80 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 31 Mar 2017 02:24:03 +0200
Subject: [PATCH 156/410] bpf, verifier: fix rejection of unaligned access
 checks for map_value_adj

Currently, the verifier doesn't reject unaligned access for map_value_adj
register types. Commit 484611357c19 ("bpf: allow access into map value
arrays") added logic to check_ptr_alignment() extending it from PTR_TO_PACKET
to also PTR_TO_MAP_VALUE_ADJ, but for PTR_TO_MAP_VALUE_ADJ no enforcement
is in place, because reg->id for PTR_TO_MAP_VALUE_ADJ reg types is never
non-zero, meaning, we can cause BPF_H/_W/_DW-based unaligned access for
architectures not supporting efficient unaligned access, and thus worst
case could raise exceptions on some archs that are unable to correct the
unaligned access or perform a different memory access to the actual
requested one and such.

i) Unaligned load with !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
   on r0 (map_value_adj):

   0: (bf) r2 = r10
   1: (07) r2 += -8
   2: (7a) *(u64 *)(r2 +0) = 0
   3: (18) r1 = 0x42533a00
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+11
    R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
   7: (61) r1 = *(u32 *)(r0 +0)
   8: (35) if r1 >= 0xb goto pc+9
    R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R1=inv,min_value=0,max_value=10 R10=fp
   9: (07) r0 += 3
  10: (79) r7 = *(u64 *)(r0 +0)
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R1=inv,min_value=0,max_value=10 R10=fp
  11: (79) r7 = *(u64 *)(r0 +2)
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R1=inv,min_value=0,max_value=10 R7=inv R10=fp
  [...]

ii) Unaligned store with !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
    on r0 (map_value_adj):

   0: (bf) r2 = r10
   1: (07) r2 += -8
   2: (7a) *(u64 *)(r2 +0) = 0
   3: (18) r1 = 0x4df16a00
   5: (85) call bpf_map_lookup_elem#1
   6: (15) if r0 == 0x0 goto pc+19
    R0=map_value(ks=8,vs=48,id=0),min_value=0,max_value=0 R10=fp
   7: (07) r0 += 3
   8: (7a) *(u64 *)(r0 +0) = 42
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp
   9: (7a) *(u64 *)(r0 +2) = 43
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp
  10: (7a) *(u64 *)(r0 -2) = 44
    R0=map_value_adj(ks=8,vs=48,id=0),min_value=3,max_value=3 R10=fp
  [...]

For the PTR_TO_PACKET type, reg->id is initially zero when skb->data
was fetched, it later receives a reg->id from env->id_gen generator
once another register with UNKNOWN_VALUE type was added to it via
check_packet_ptr_add(). The purpose of this reg->id is twofold: i) it
is used in find_good_pkt_pointers() for setting the allowed access
range for regs with PTR_TO_PACKET of same id once verifier matched
on data/data_end tests, and ii) for check_ptr_alignment() to determine
that when not having efficient unaligned access and register with
UNKNOWN_VALUE was added to PTR_TO_PACKET, that we're only allowed
to access the content bytewise due to unknown unalignment. reg->id
was never intended for PTR_TO_MAP_VALUE{,_ADJ} types and thus is
always zero, the only marking is in PTR_TO_MAP_VALUE_OR_NULL that
was added after 484611357c19 via 57a09bf0a416 ("bpf: Detect identical
PTR_TO_MAP_VALUE_OR_NULL registers"). Above tests will fail for
non-root environment due to prohibited pointer arithmetic.

The fix splits register-type specific checks into their own helper
instead of keeping them combined, so we don't run into a similar
issue in future once we extend check_ptr_alignment() further and
forget to add reg->type checks for some of the checks.

Fixes: 484611357c19 ("bpf: allow access into map value arrays")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c | 58 ++++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 86deddecff25..a834068a400e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -765,38 +765,56 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 	}
 }
 
-static int check_ptr_alignment(struct bpf_verifier_env *env,
-			       struct bpf_reg_state *reg, int off, int size)
+static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
+				   int off, int size)
 {
-	if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
-		if (off % size != 0) {
-			verbose("misaligned access off %d size %d\n",
-				off, size);
-			return -EACCES;
-		} else {
-			return 0;
-		}
-	}
-
-	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
-		/* misaligned access to packet is ok on x86,arm,arm64 */
-		return 0;
-
 	if (reg->id && size != 1) {
-		verbose("Unknown packet alignment. Only byte-sized access allowed\n");
+		verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
 		return -EACCES;
 	}
 
 	/* skb->data is NET_IP_ALIGN-ed */
-	if (reg->type == PTR_TO_PACKET &&
-	    (NET_IP_ALIGN + reg->off + off) % size != 0) {
+	if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
 		verbose("misaligned packet access off %d+%d+%d size %d\n",
 			NET_IP_ALIGN, reg->off, off, size);
 		return -EACCES;
 	}
+
 	return 0;
 }
 
+static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
+				   int size)
+{
+	if (size != 1) {
+		verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+		return -EACCES;
+	}
+
+	return 0;
+}
+
+static int check_ptr_alignment(const struct bpf_reg_state *reg,
+			       int off, int size)
+{
+	switch (reg->type) {
+	case PTR_TO_PACKET:
+		return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+		       check_pkt_ptr_alignment(reg, off, size);
+	case PTR_TO_MAP_VALUE_ADJ:
+		return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+		       check_val_ptr_alignment(reg, size);
+	default:
+		if (off % size != 0) {
+			verbose("misaligned access off %d size %d\n",
+				off, size);
+			return -EACCES;
+		}
+
+		return 0;
+	}
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -818,7 +836,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 	if (size < 0)
 		return size;
 
-	err = check_ptr_alignment(env, reg, off, size);
+	err = check_ptr_alignment(reg, off, size);
 	if (err)
 		return err;
 

From 02ea80b1850e48abbce77878896229d7cc5cb230 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 31 Mar 2017 02:24:04 +0200
Subject: [PATCH 157/410] bpf: add various verifier test cases for self-tests

Add a couple of test cases, for example, probing for xadd on a spilled
pointer to packet and map_value_adj register, various other map_value_adj
tests including the unaligned load/store, and trying out pointer arithmetic
on map_value_adj register itself. For the unaligned load/store, we need
to figure out whether the architecture has efficient unaligned access and
need to mark affected tests accordingly.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/include/linux/filter.h                |  10 +
 tools/testing/selftests/bpf/Makefile        |   9 +-
 tools/testing/selftests/bpf/test_verifier.c | 270 +++++++++++++++++++-
 3 files changed, 283 insertions(+), 6 deletions(-)

diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index 122153b16ea4..390d7c9685fd 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -168,6 +168,16 @@
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Memory store, *(uint *) (dst_reg + off16) = imm32 */
 
 #define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6a1ad58cb66f..9af09e8099c0 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,7 +1,14 @@
 LIBDIR := ../../../lib
 BPFDIR := $(LIBDIR)/bpf
+APIDIR := ../../../include/uapi
+GENDIR := ../../../../include/generated
+GENHDR := $(GENDIR)/autoconf.h
 
-CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR)
+ifneq ($(wildcard $(GENHDR)),)
+  GENFLAGS := -DHAVE_GENHDR
+endif
+
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS)
 LDLIBS += -lcap
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 7d761d4cc759..c848e90b6421 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -30,6 +30,14 @@
 
 #include <bpf/bpf.h>
 
+#ifdef HAVE_GENHDR
+# include "autoconf.h"
+#else
+# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+#  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+# endif
+#endif
+
 #include "../../../include/linux/filter.h"
 
 #ifndef ARRAY_SIZE
@@ -39,6 +47,8 @@
 #define MAX_INSNS	512
 #define MAX_FIXUPS	8
 
+#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
+
 struct bpf_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
@@ -53,6 +63,7 @@ struct bpf_test {
 		REJECT
 	} result, result_unpriv;
 	enum bpf_prog_type prog_type;
+	uint8_t flags;
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -2431,6 +2442,30 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"direct packet access: test15 (spill with xadd)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 4096),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 invalid mem access 'inv'",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
@@ -2934,6 +2969,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"valid map access into an array with a variable",
@@ -2957,6 +2993,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"valid map access into an array with a signed variable",
@@ -2984,6 +3021,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with a constant",
@@ -3025,6 +3063,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is outside of the array range",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with a variable",
@@ -3048,6 +3087,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with no floor check",
@@ -3074,6 +3114,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with a invalid max check",
@@ -3100,6 +3141,7 @@ static struct bpf_test tests[] = {
 		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid map access into an array with a invalid max check",
@@ -3129,6 +3171,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result_unpriv = REJECT,
 		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"multiple registers share map_lookup_elem result",
@@ -3252,6 +3295,7 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"constant register |= constant should keep constant type",
@@ -3981,7 +4025,208 @@ static struct bpf_test tests[] = {
 		.result_unpriv = REJECT,
 	},
 	{
-		"map element value (adjusted) is preserved across register spilling",
+		"map element value or null is marked on register spilling",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value store of cleared call register",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R1 !read_ok",
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value with unaligned store",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
+			BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
+			BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"map element value with unaligned load",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"map element value illegal alu op, 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value illegal alu op, 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value illegal alu op, 3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value illegal alu op, 4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value illegal alu op, 5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_IMM(BPF_REG_3, 4096),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 invalid mem access 'inv'",
+		.errstr = "R0 invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value is preserved across register spilling",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
@@ -4003,6 +4248,7 @@ static struct bpf_test tests[] = {
 		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
@@ -4441,6 +4687,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
 		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
 		"invalid range check",
@@ -4472,6 +4719,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
 		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	}
 };
 
@@ -4550,11 +4798,11 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
 static void do_test_single(struct bpf_test *test, bool unpriv,
 			   int *passes, int *errors)
 {
+	int fd_prog, expected_ret, reject_from_alignment;
 	struct bpf_insn *prog = test->insns;
 	int prog_len = probe_filter_length(prog);
 	int prog_type = test->prog_type;
 	int fd_f1 = -1, fd_f2 = -1, fd_f3 = -1;
-	int fd_prog, expected_ret;
 	const char *expected_err;
 
 	do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3);
@@ -4567,8 +4815,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		       test->result_unpriv : test->result;
 	expected_err = unpriv && test->errstr_unpriv ?
 		       test->errstr_unpriv : test->errstr;
+
+	reject_from_alignment = fd_prog < 0 &&
+				(test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
+				strstr(bpf_vlog, "Unknown alignment.");
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (reject_from_alignment) {
+		printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n",
+		       strerror(errno));
+		goto fail_log;
+	}
+#endif
 	if (expected_ret == ACCEPT) {
-		if (fd_prog < 0) {
+		if (fd_prog < 0 && !reject_from_alignment) {
 			printf("FAIL\nFailed to load prog '%s'!\n",
 			       strerror(errno));
 			goto fail_log;
@@ -4578,14 +4837,15 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 			printf("FAIL\nUnexpected success to load!\n");
 			goto fail_log;
 		}
-		if (!strstr(bpf_vlog, expected_err)) {
+		if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) {
 			printf("FAIL\nUnexpected error message!\n");
 			goto fail_log;
 		}
 	}
 
 	(*passes)++;
-	printf("OK\n");
+	printf("OK%s\n", reject_from_alignment ?
+	       " (NOTE: reject due to unknown alignment)" : "");
 close_fds:
 	close(fd_prog);
 	close(fd_f1);

From afe89962ee0799955b606cc7637ac86a296923a6 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 31 Mar 2017 17:57:28 +0800
Subject: [PATCH 158/410] sctp: use right in and out stream cnt

Since sctp reconf was added in sctp, the real cnt of in/out stream
have not been c.sinit_max_instreams and c.sinit_num_ostreams any
more.

This patch is to replace them with stream->in/outcnt.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/outqueue.c     |  3 +--
 net/sctp/proc.c         |  4 ++--
 net/sctp/sm_statefuns.c |  6 +++---
 net/sctp/socket.c       | 10 +++++-----
 4 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 025ccff67072..8081476ed313 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1026,8 +1026,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 			/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
 			 * stream identifier.
 			 */
-			if (chunk->sinfo.sinfo_stream >=
-			    asoc->c.sinit_num_ostreams) {
+			if (chunk->sinfo.sinfo_stream >= asoc->stream->outcnt) {
 
 				/* Mark as failed send. */
 				sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 206377fe91ec..a0b29d43627f 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -361,8 +361,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	sctp_seq_dump_remote_addrs(seq, assoc);
 	seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
 		   "%8d %8d %8d %8d",
-		assoc->hbinterval, assoc->c.sinit_max_instreams,
-		assoc->c.sinit_num_ostreams, assoc->max_retrans,
+		assoc->hbinterval, assoc->stream->incnt,
+		assoc->stream->outcnt, assoc->max_retrans,
 		assoc->init_retries, assoc->shutdown_retries,
 		assoc->rtx_data_chunks,
 		atomic_read(&sk->sk_wmem_alloc),
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index e03bb1aab4d0..24c6ccce7539 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3946,7 +3946,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net,
 
 	/* Silently discard the chunk if stream-id is not valid */
 	sctp_walk_fwdtsn(skip, chunk) {
-		if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+		if (ntohs(skip->stream) >= asoc->stream->incnt)
 			goto discard_noforce;
 	}
 
@@ -4017,7 +4017,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast(
 
 	/* Silently discard the chunk if stream-id is not valid */
 	sctp_walk_fwdtsn(skip, chunk) {
-		if (ntohs(skip->stream) >= asoc->c.sinit_max_instreams)
+		if (ntohs(skip->stream) >= asoc->stream->incnt)
 			goto gen_shutdown;
 	}
 
@@ -6353,7 +6353,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
 	 * and discard the DATA chunk.
 	 */
 	sid = ntohs(data_hdr->stream);
-	if (sid >= asoc->c.sinit_max_instreams) {
+	if (sid >= asoc->stream->incnt) {
 		/* Mark tsn as received even though we drop it */
 		sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_TSN, SCTP_U32(tsn));
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index baa269a0d52e..12fbae2c1002 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1920,7 +1920,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	}
 
 	/* Check for invalid stream. */
-	if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) {
+	if (sinfo->sinfo_stream >= asoc->stream->outcnt) {
 		err = -EINVAL;
 		goto out_free;
 	}
@@ -4461,8 +4461,8 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
 	info->sctpi_rwnd = asoc->a_rwnd;
 	info->sctpi_unackdata = asoc->unack_data;
 	info->sctpi_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
-	info->sctpi_instrms = asoc->c.sinit_max_instreams;
-	info->sctpi_outstrms = asoc->c.sinit_num_ostreams;
+	info->sctpi_instrms = asoc->stream->incnt;
+	info->sctpi_outstrms = asoc->stream->outcnt;
 	list_for_each(pos, &asoc->base.inqueue.in_chunk_list)
 		info->sctpi_inqueue++;
 	list_for_each(pos, &asoc->outqueue.out_chunk_list)
@@ -4691,8 +4691,8 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
 	status.sstat_unackdata = asoc->unack_data;
 
 	status.sstat_penddata = sctp_tsnmap_pending(&asoc->peer.tsn_map);
-	status.sstat_instrms = asoc->c.sinit_max_instreams;
-	status.sstat_outstrms = asoc->c.sinit_num_ostreams;
+	status.sstat_instrms = asoc->stream->incnt;
+	status.sstat_outstrms = asoc->stream->outcnt;
 	status.sstat_fragmentation_point = asoc->frag_point;
 	status.sstat_primary.spinfo_assoc_id = sctp_assoc2id(transport->asoc);
 	memcpy(&status.sstat_primary.spinfo_address, &transport->ipaddr,

From 61b9a047729bb230978178bca6729689d0c50ca2 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:25 +0200
Subject: [PATCH 159/410] l2tp: fix race in l2tp_recv_common()

Taking a reference on sessions in l2tp_recv_common() is racy; this
has to be done by the callers.

To this end, a new function is required (l2tp_session_get()) to
atomically lookup a session and take a reference on it. Callers then
have to manually drop this reference.

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 73 ++++++++++++++++++++++++++++++++++++--------
 net/l2tp/l2tp_core.h |  3 ++
 net/l2tp/l2tp_ip.c   | 17 ++++++++---
 net/l2tp/l2tp_ip6.c  | 18 ++++++++---
 4 files changed, 88 insertions(+), 23 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8adab6335ced..8a067536d15c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -278,6 +278,55 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
 }
 EXPORT_SYMBOL_GPL(l2tp_session_find);
 
+/* Like l2tp_session_find() but takes a reference on the returned session.
+ * Optionally calls session->ref() too if do_ref is true.
+ */
+struct l2tp_session *l2tp_session_get(struct net *net,
+				      struct l2tp_tunnel *tunnel,
+				      u32 session_id, bool do_ref)
+{
+	struct hlist_head *session_list;
+	struct l2tp_session *session;
+
+	if (!tunnel) {
+		struct l2tp_net *pn = l2tp_pernet(net);
+
+		session_list = l2tp_session_id_hash_2(pn, session_id);
+
+		rcu_read_lock_bh();
+		hlist_for_each_entry_rcu(session, session_list, global_hlist) {
+			if (session->session_id == session_id) {
+				l2tp_session_inc_refcount(session);
+				if (do_ref && session->ref)
+					session->ref(session);
+				rcu_read_unlock_bh();
+
+				return session;
+			}
+		}
+		rcu_read_unlock_bh();
+
+		return NULL;
+	}
+
+	session_list = l2tp_session_id_hash(tunnel, session_id);
+	read_lock_bh(&tunnel->hlist_lock);
+	hlist_for_each_entry(session, session_list, hlist) {
+		if (session->session_id == session_id) {
+			l2tp_session_inc_refcount(session);
+			if (do_ref && session->ref)
+				session->ref(session);
+			read_unlock_bh(&tunnel->hlist_lock);
+
+			return session;
+		}
+	}
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_get);
+
 struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 {
 	int hash;
@@ -633,6 +682,9 @@ discard:
  * a data (not control) frame before coming here. Fields up to the
  * session-id have already been parsed and ptr points to the data
  * after the session-id.
+ *
+ * session->ref() must have been called prior to l2tp_recv_common().
+ * session->deref() will be called automatically after skb is processed.
  */
 void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 		      unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -642,14 +694,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	int offset;
 	u32 ns, nr;
 
-	/* The ref count is increased since we now hold a pointer to
-	 * the session. Take care to decrement the refcnt when exiting
-	 * this function from now on...
-	 */
-	l2tp_session_inc_refcount(session);
-	if (session->ref)
-		(*session->ref)(session);
-
 	/* Parse and check optional cookie */
 	if (session->peer_cookie_len > 0) {
 		if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
@@ -802,8 +846,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 	/* Try to dequeue as many skbs from reorder_q as we can. */
 	l2tp_recv_dequeue(session);
 
-	l2tp_session_dec_refcount(session);
-
 	return;
 
 discard:
@@ -812,8 +854,6 @@ discard:
 
 	if (session->deref)
 		(*session->deref)(session);
-
-	l2tp_session_dec_refcount(session);
 }
 EXPORT_SYMBOL(l2tp_recv_common);
 
@@ -920,8 +960,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	}
 
 	/* Find the session context */
-	session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+	session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true);
 	if (!session || !session->recv_skb) {
+		if (session) {
+			if (session->deref)
+				session->deref(session);
+			l2tp_session_dec_refcount(session);
+		}
+
 		/* Not found? Pass to userspace to deal with */
 		l2tp_info(tunnel, L2TP_MSG_DATA,
 			  "%s: no session found (%u/%u). Passing up.\n",
@@ -930,6 +976,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	}
 
 	l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
+	l2tp_session_dec_refcount(session);
 
 	return 0;
 
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index aebf281d09ee..4544e81a3d27 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -230,6 +230,9 @@ out:
 	return tunnel;
 }
 
+struct l2tp_session *l2tp_session_get(struct net *net,
+				      struct l2tp_tunnel *tunnel,
+				      u32 session_id, bool do_ref);
 struct l2tp_session *l2tp_session_find(struct net *net,
 				       struct l2tp_tunnel *tunnel,
 				       u32 session_id);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 7208fbe5856b..4d322c1b7233 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	}
 
 	/* Ok, this is a data packet. Lookup the session. */
-	session = l2tp_session_find(net, NULL, session_id);
-	if (session == NULL)
+	session = l2tp_session_get(net, NULL, session_id, true);
+	if (!session)
 		goto discard;
 
 	tunnel = session->tunnel;
-	if (tunnel == NULL)
-		goto discard;
+	if (!tunnel)
+		goto discard_sess;
 
 	/* Trace packet contents, if enabled */
 	if (tunnel->debug & L2TP_MSG_DATA) {
 		length = min(32u, skb->len);
 		if (!pskb_may_pull(skb, length))
-			goto discard;
+			goto discard_sess;
 
 		/* Point to L2TP header */
 		optr = ptr = skb->data;
@@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
 	}
 
 	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+	l2tp_session_dec_refcount(session);
 
 	return 0;
 
@@ -203,6 +204,12 @@ pass_up:
 
 	return sk_receive_skb(sk, skb, 1);
 
+discard_sess:
+	if (session->deref)
+		session->deref(session);
+	l2tp_session_dec_refcount(session);
+	goto discard;
+
 discard_put:
 	sock_put(sk);
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 516d7ce24ba7..88b397c30d86 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 	}
 
 	/* Ok, this is a data packet. Lookup the session. */
-	session = l2tp_session_find(net, NULL, session_id);
-	if (session == NULL)
+	session = l2tp_session_get(net, NULL, session_id, true);
+	if (!session)
 		goto discard;
 
 	tunnel = session->tunnel;
-	if (tunnel == NULL)
-		goto discard;
+	if (!tunnel)
+		goto discard_sess;
 
 	/* Trace packet contents, if enabled */
 	if (tunnel->debug & L2TP_MSG_DATA) {
 		length = min(32u, skb->len);
 		if (!pskb_may_pull(skb, length))
-			goto discard;
+			goto discard_sess;
 
 		/* Point to L2TP header */
 		optr = ptr = skb->data;
@@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
 
 	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len,
 			 tunnel->recv_payload_hook);
+	l2tp_session_dec_refcount(session);
+
 	return 0;
 
 pass_up:
@@ -216,6 +218,12 @@ pass_up:
 
 	return sk_receive_skb(sk, skb, 1);
 
+discard_sess:
+	if (session->deref)
+		session->deref(session);
+	l2tp_session_dec_refcount(session);
+	goto discard;
+
 discard_put:
 	sock_put(sk);
 

From 57377d63547861919ee634b845c7caa38de4a452 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:26 +0200
Subject: [PATCH 160/410] l2tp: ensure session can't get removed during
 pppol2tp_session_ioctl()

Holding a reference on session is required before calling
pppol2tp_session_ioctl(). The session could get freed while processing the
ioctl otherwise. Since pppol2tp_session_ioctl() uses the session's socket,
we also need to take a reference on it in l2tp_session_get().

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 123b6a2411a0..827e55c41ba2 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1141,11 +1141,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
 		if (stats.session_id != 0) {
 			/* resend to session ioctl handler */
 			struct l2tp_session *session =
-				l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
-			if (session != NULL)
-				err = pppol2tp_session_ioctl(session, cmd, arg);
-			else
+				l2tp_session_get(sock_net(sk), tunnel,
+						 stats.session_id, true);
+
+			if (session) {
+				err = pppol2tp_session_ioctl(session, cmd,
+							     arg);
+				if (session->deref)
+					session->deref(session);
+				l2tp_session_dec_refcount(session);
+			} else {
 				err = -EBADR;
+			}
 			break;
 		}
 #ifdef CONFIG_XFRM

From dbdbc73b44782e22b3b4b6e8b51e7a3d245f3086 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:27 +0200
Subject: [PATCH 161/410] l2tp: fix duplicate session creation

l2tp_session_create() relies on its caller for checking for duplicate
sessions. This is racy since a session can be concurrently inserted
after the caller's verification.

Fix this by letting l2tp_session_create() verify sessions uniqueness
upon insertion. Callers need to be adapted to check for
l2tp_session_create()'s return code instead of calling
l2tp_session_find().

pppol2tp_connect() is a bit special because it has to work on existing
sessions (if they're not connected) or to create a new session if none
is found. When acting on a preexisting session, a reference must be
held or it could go away on us. So we have to use l2tp_session_get()
instead of l2tp_session_find() and drop the reference before exiting.

Fixes: d9e31d17ceba ("l2tp: Add L2TP ethernet pseudowire support")
Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 70 +++++++++++++++++++++++++++++++++-----------
 net/l2tp/l2tp_eth.c  | 10 ++-----
 net/l2tp/l2tp_ppp.c  | 62 +++++++++++++++++++--------------------
 3 files changed, 85 insertions(+), 57 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8a067536d15c..46b450a1bc21 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -374,6 +374,48 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
 
+static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
+				      struct l2tp_session *session)
+{
+	struct l2tp_session *session_walk;
+	struct hlist_head *g_head;
+	struct hlist_head *head;
+	struct l2tp_net *pn;
+
+	head = l2tp_session_id_hash(tunnel, session->session_id);
+
+	write_lock_bh(&tunnel->hlist_lock);
+	hlist_for_each_entry(session_walk, head, hlist)
+		if (session_walk->session_id == session->session_id)
+			goto exist;
+
+	if (tunnel->version == L2TP_HDR_VER_3) {
+		pn = l2tp_pernet(tunnel->l2tp_net);
+		g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net),
+						session->session_id);
+
+		spin_lock_bh(&pn->l2tp_session_hlist_lock);
+		hlist_for_each_entry(session_walk, g_head, global_hlist)
+			if (session_walk->session_id == session->session_id)
+				goto exist_glob;
+
+		hlist_add_head_rcu(&session->global_hlist, g_head);
+		spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+	}
+
+	hlist_add_head(&session->hlist, head);
+	write_unlock_bh(&tunnel->hlist_lock);
+
+	return 0;
+
+exist_glob:
+	spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+exist:
+	write_unlock_bh(&tunnel->hlist_lock);
+
+	return -EEXIST;
+}
+
 /* Lookup a tunnel by id
  */
 struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
@@ -1785,6 +1827,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
 struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
 {
 	struct l2tp_session *session;
+	int err;
 
 	session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
 	if (session != NULL) {
@@ -1840,6 +1883,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 
 		l2tp_session_set_header_len(session, tunnel->version);
 
+		err = l2tp_session_add_to_tunnel(tunnel, session);
+		if (err) {
+			kfree(session);
+
+			return ERR_PTR(err);
+		}
+
 		/* Bump the reference count. The session context is deleted
 		 * only when this drops to zero.
 		 */
@@ -1849,28 +1899,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 		/* Ensure tunnel socket isn't deleted */
 		sock_hold(tunnel->sock);
 
-		/* Add session to the tunnel's hash list */
-		write_lock_bh(&tunnel->hlist_lock);
-		hlist_add_head(&session->hlist,
-			       l2tp_session_id_hash(tunnel, session_id));
-		write_unlock_bh(&tunnel->hlist_lock);
-
-		/* And to the global session list if L2TPv3 */
-		if (tunnel->version != L2TP_HDR_VER_2) {
-			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
-			spin_lock_bh(&pn->l2tp_session_hlist_lock);
-			hlist_add_head_rcu(&session->global_hlist,
-					   l2tp_session_id_hash_2(pn, session_id));
-			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
-		}
-
 		/* Ignore management session in session count value */
 		if (session->session_id != 0)
 			atomic_inc(&l2tp_session_count);
+
+		return session;
 	}
 
-	return session;
+	return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL_GPL(l2tp_session_create);
 
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 8bf18a5f66e0..6fd41d7afe1e 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -221,12 +221,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 		goto out;
 	}
 
-	session = l2tp_session_find(net, tunnel, session_id);
-	if (session) {
-		rc = -EEXIST;
-		goto out;
-	}
-
 	if (cfg->ifname) {
 		dev = dev_get_by_name(net, cfg->ifname);
 		if (dev) {
@@ -240,8 +234,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 
 	session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
 				      peer_session_id, cfg);
-	if (!session) {
-		rc = -ENOMEM;
+	if (IS_ERR(session)) {
+		rc = PTR_ERR(session);
 		goto out;
 	}
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 827e55c41ba2..26501902d1a7 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -583,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	int error = 0;
 	u32 tunnel_id, peer_tunnel_id;
 	u32 session_id, peer_session_id;
+	bool drop_refcnt = false;
 	int ver = 2;
 	int fd;
 
@@ -684,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel->peer_tunnel_id == 0)
 		tunnel->peer_tunnel_id = peer_tunnel_id;
 
-	/* Create session if it doesn't already exist. We handle the
-	 * case where a session was previously created by the netlink
-	 * interface by checking that the session doesn't already have
-	 * a socket and its tunnel socket are what we expect. If any
-	 * of those checks fail, return EEXIST to the caller.
-	 */
-	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
-	if (session == NULL) {
-		/* Default MTU must allow space for UDP/L2TP/PPP
-		 * headers.
-		 */
-		cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	session = l2tp_session_get(sock_net(sk), tunnel, session_id, false);
+	if (session) {
+		drop_refcnt = true;
+		ps = l2tp_session_priv(session);
 
-		/* Allocate and initialize a new session context. */
-		session = l2tp_session_create(sizeof(struct pppol2tp_session),
-					      tunnel, session_id,
-					      peer_session_id, &cfg);
-		if (session == NULL) {
-			error = -ENOMEM;
+		/* Using a pre-existing session is fine as long as it hasn't
+		 * been connected yet.
+		 */
+		if (ps->sock) {
+			error = -EEXIST;
+			goto end;
+		}
+
+		/* consistency checks */
+		if (ps->tunnel_sock != tunnel->sock) {
+			error = -EEXIST;
 			goto end;
 		}
 	} else {
-		ps = l2tp_session_priv(session);
-		error = -EEXIST;
-		if (ps->sock != NULL)
-			goto end;
+		/* Default MTU must allow space for UDP/L2TP/PPP headers */
+		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+		cfg.mru = cfg.mtu;
 
-		/* consistency checks */
-		if (ps->tunnel_sock != tunnel->sock)
+		session = l2tp_session_create(sizeof(struct pppol2tp_session),
+					      tunnel, session_id,
+					      peer_session_id, &cfg);
+		if (IS_ERR(session)) {
+			error = PTR_ERR(session);
 			goto end;
+		}
 	}
 
 	/* Associate session with its PPPoL2TP socket */
@@ -778,6 +779,8 @@ out_no_ppp:
 		  session->name);
 
 end:
+	if (drop_refcnt)
+		l2tp_session_dec_refcount(session);
 	release_sock(sk);
 
 	return error;
@@ -805,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
 	if (tunnel->sock == NULL)
 		goto out;
 
-	/* Check that this session doesn't already exist */
-	error = -EEXIST;
-	session = l2tp_session_find(net, tunnel, session_id);
-	if (session != NULL)
-		goto out;
-
 	/* Default MTU values. */
 	if (cfg->mtu == 0)
 		cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
@@ -818,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
 		cfg->mru = cfg->mtu;
 
 	/* Allocate and initialize a new session context. */
-	error = -ENOMEM;
 	session = l2tp_session_create(sizeof(struct pppol2tp_session),
 				      tunnel, session_id,
 				      peer_session_id, cfg);
-	if (session == NULL)
+	if (IS_ERR(session)) {
+		error = PTR_ERR(session);
 		goto out;
+	}
 
 	ps = l2tp_session_priv(session);
 	ps->tunnel_sock = tunnel->sock;

From 5e6a9e5a3554a5b3db09cdc22253af1849c65dff Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:29 +0200
Subject: [PATCH 162/410] l2tp: hold session while sending creation
 notifications

l2tp_session_find() doesn't take any reference on the returned session.
Therefore, the session may disappear while sending the notification.

Use l2tp_session_get() instead and decrement session's refcount once
the notification is sent.

Fixes: 33f72e6f0c67 ("l2tp : multicast notification to the registered listeners")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_netlink.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 3620fba31786..f1b68effb077 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -642,10 +642,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 			session_id, peer_session_id, &cfg);
 
 	if (ret >= 0) {
-		session = l2tp_session_find(net, tunnel, session_id);
-		if (session)
+		session = l2tp_session_get(net, tunnel, session_id, false);
+		if (session) {
 			ret = l2tp_session_notify(&l2tp_nl_family, info, session,
 						  L2TP_CMD_SESSION_CREATE);
+			l2tp_session_dec_refcount(session);
+		}
 	}
 
 out:

From 2777e2ab5a9cf2b4524486c6db1517a6ded25261 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Fri, 31 Mar 2017 13:02:30 +0200
Subject: [PATCH 163/410] l2tp: take a reference on sessions used in genetlink
 handlers

Callers of l2tp_nl_session_find() need to hold a reference on the
returned session since there's no guarantee that it isn't going to
disappear from under them.

Relying on the fact that no l2tp netlink message may be processed
concurrently isn't enough: sessions can be deleted by other means
(e.g. by closing the PPPOL2TP socket of a ppp pseudowire).

l2tp_nl_cmd_session_delete() is a bit special: it runs a callback
function that may require a previous call to session->ref(). In
particular, for ppp pseudowires, the callback is l2tp_session_delete(),
which then calls pppol2tp_session_close() and dereferences the PPPOL2TP
socket. The socket might already be gone at the moment
l2tp_session_delete() calls session->ref(), so we need to take a
reference during the session lookup. So we need to pass the do_ref
variable down to l2tp_session_get() and l2tp_session_get_by_ifname().

Since all callers have to be updated, l2tp_session_find_by_ifname() and
l2tp_nl_session_find() are renamed to reflect their new behaviour.

Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c    |  9 +++++++--
 net/l2tp/l2tp_core.h    |  3 ++-
 net/l2tp/l2tp_netlink.c | 39 ++++++++++++++++++++++++++-------------
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 46b450a1bc21..e927422d8c58 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -352,7 +352,8 @@ EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
 /* Lookup a session by interface name.
  * This is very inefficient but is only used by management interfaces.
  */
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+						bool do_ref)
 {
 	struct l2tp_net *pn = l2tp_pernet(net);
 	int hash;
@@ -362,7 +363,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
 		hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
 			if (!strcmp(session->ifname, ifname)) {
+				l2tp_session_inc_refcount(session);
+				if (do_ref && session->ref)
+					session->ref(session);
 				rcu_read_unlock_bh();
+
 				return session;
 			}
 		}
@@ -372,7 +377,7 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
 
 static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
 				      struct l2tp_session *session)
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 4544e81a3d27..3b9b704a84e4 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -237,7 +237,8 @@ struct l2tp_session *l2tp_session_find(struct net *net,
 				       struct l2tp_tunnel *tunnel,
 				       u32 session_id);
 struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+						bool do_ref);
 struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
 
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index f1b68effb077..93e317377c66 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -48,7 +48,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
 /* Accessed under genl lock */
 static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
 
-static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
+						bool do_ref)
 {
 	u32 tunnel_id;
 	u32 session_id;
@@ -59,14 +60,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
 
 	if (info->attrs[L2TP_ATTR_IFNAME]) {
 		ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
-		session = l2tp_session_find_by_ifname(net, ifname);
+		session = l2tp_session_get_by_ifname(net, ifname, do_ref);
 	} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
 		   (info->attrs[L2TP_ATTR_CONN_ID])) {
 		tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
 		session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
 		tunnel = l2tp_tunnel_find(net, tunnel_id);
 		if (tunnel)
-			session = l2tp_session_find(net, tunnel, session_id);
+			session = l2tp_session_get(net, tunnel, session_id,
+						   do_ref);
 	}
 
 	return session;
@@ -660,7 +662,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
 	struct l2tp_session *session;
 	u16 pw_type;
 
-	session = l2tp_nl_session_find(info);
+	session = l2tp_nl_session_get(info, true);
 	if (session == NULL) {
 		ret = -ENODEV;
 		goto out;
@@ -674,6 +676,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
 		if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
 			ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
 
+	if (session->deref)
+		session->deref(session);
+	l2tp_session_dec_refcount(session);
+
 out:
 	return ret;
 }
@@ -683,7 +689,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	int ret = 0;
 	struct l2tp_session *session;
 
-	session = l2tp_nl_session_find(info);
+	session = l2tp_nl_session_get(info, false);
 	if (session == NULL) {
 		ret = -ENODEV;
 		goto out;
@@ -718,6 +724,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	ret = l2tp_session_notify(&l2tp_nl_family, info,
 				  session, L2TP_CMD_SESSION_MODIFY);
 
+	l2tp_session_dec_refcount(session);
+
 out:
 	return ret;
 }
@@ -813,29 +821,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	int ret;
 
-	session = l2tp_nl_session_find(info);
+	session = l2tp_nl_session_get(info, false);
 	if (session == NULL) {
 		ret = -ENODEV;
-		goto out;
+		goto err;
 	}
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg) {
 		ret = -ENOMEM;
-		goto out;
+		goto err_ref;
 	}
 
 	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
 				   0, session, L2TP_CMD_SESSION_GET);
 	if (ret < 0)
-		goto err_out;
+		goto err_ref_msg;
 
-	return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
+	ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
 
-err_out:
+	l2tp_session_dec_refcount(session);
+
+	return ret;
+
+err_ref_msg:
 	nlmsg_free(msg);
-
-out:
+err_ref:
+	l2tp_session_dec_refcount(session);
+err:
 	return ret;
 }
 

From bf17aa36c0f199f5b254262e77eaefda7da0f50b Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Wed, 1 Mar 2017 18:22:01 -0800
Subject: [PATCH 164/410] nvme: Correct NVMF enum values to match NVMe-oF rev
 1.0

The enum values for QPTYPE, PRTYPE and CMS are off by 1 from the
values defined in figure 42 of the NVM Express over Fabrics 1.0:

    http://www.nvmexpress.org/wp-content/uploads/NVMe_over_Fabrics_1_0_Gold_20160605-1.pdf

Fix our enums to match the final spec.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/nvme.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index c43d435d4225..9061780b141f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -64,26 +64,26 @@ enum {
  * RDMA_QPTYPE field
  */
 enum {
-	NVMF_RDMA_QPTYPE_CONNECTED	= 0, /* Reliable Connected */
-	NVMF_RDMA_QPTYPE_DATAGRAM	= 1, /* Reliable Datagram */
+	NVMF_RDMA_QPTYPE_CONNECTED	= 1, /* Reliable Connected */
+	NVMF_RDMA_QPTYPE_DATAGRAM	= 2, /* Reliable Datagram */
 };
 
 /* RDMA QP Service Type codes for Discovery Log Page entry TSAS
  * RDMA_QPTYPE field
  */
 enum {
-	NVMF_RDMA_PRTYPE_NOT_SPECIFIED	= 0, /* No Provider Specified */
-	NVMF_RDMA_PRTYPE_IB		= 1, /* InfiniBand */
-	NVMF_RDMA_PRTYPE_ROCE		= 2, /* InfiniBand RoCE */
-	NVMF_RDMA_PRTYPE_ROCEV2		= 3, /* InfiniBand RoCEV2 */
-	NVMF_RDMA_PRTYPE_IWARP		= 4, /* IWARP */
+	NVMF_RDMA_PRTYPE_NOT_SPECIFIED	= 1, /* No Provider Specified */
+	NVMF_RDMA_PRTYPE_IB		= 2, /* InfiniBand */
+	NVMF_RDMA_PRTYPE_ROCE		= 3, /* InfiniBand RoCE */
+	NVMF_RDMA_PRTYPE_ROCEV2		= 4, /* InfiniBand RoCEV2 */
+	NVMF_RDMA_PRTYPE_IWARP		= 5, /* IWARP */
 };
 
 /* RDMA Connection Management Service Type codes for Discovery Log Page
  * entry TSAS RDMA_CMS field
  */
 enum {
-	NVMF_RDMA_CMS_RDMA_CM	= 0, /* Sockets based enpoint addressing */
+	NVMF_RDMA_CMS_RDMA_CM	= 1, /* Sockets based endpoint addressing */
 };
 
 #define NVMF_AQ_DEPTH		32

From f1dd03a84dbf3e5ca91295a3d04c882b8bd86251 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 31 Mar 2017 17:00:05 +0200
Subject: [PATCH 165/410] nvme: add missing byte swap in nvme_setup_discard

Fixes: b35ba01e ("nvme: support ranged discard requests")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 9b3b57fef446..9583a5f58a1d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -270,7 +270,7 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 	memset(cmnd, 0, sizeof(*cmnd));
 	cmnd->dsm.opcode = nvme_cmd_dsm;
 	cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
-	cmnd->dsm.nr = segments - 1;
+	cmnd->dsm.nr = cpu_to_le32(segments - 1);
 	cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
 
 	req->special_vec.bv_page = virt_to_page(range);

From 5ac5fcc6c7a2339a34c876a9b6926a7f17225493 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 31 Mar 2017 17:00:06 +0200
Subject: [PATCH 166/410] nvmet: add missing byte swap in nvmet_get_smart_log

In this case entirely harmless as it's all-ones, but still nice to
shut up sparse.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/admin-cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index a7bcff45f437..76450b0c55f1 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req,
 	u16 status;
 
 	WARN_ON(req == NULL || slog == NULL);
-	if (req->cmd->get_log_page.nsid == 0xFFFFFFFF)
+	if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF))
 		status = nvmet_get_smart_log_all(req, slog);
 	else
 		status = nvmet_get_smart_log_nsid(req, slog);

From 78ce3daa7d703028c00eff2e03ad22efd116e549 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 31 Mar 2017 17:00:07 +0200
Subject: [PATCH 167/410] nvmet: fix byte swap in nvmet_execute_write_zeroes

The length field in the Write Zeroes command is a 16-bit field.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/io-cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index 4195115c7e54..e37acd77b5fe 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -180,7 +180,7 @@ static void nvmet_execute_write_zeroes(struct nvmet_req *req)
 
 	sector = le64_to_cpu(write_zeroes->slba) <<
 		(req->ns->blksize_shift - 9);
-	nr_sector = (((sector_t)le32_to_cpu(write_zeroes->length)) <<
+	nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length)) <<
 		(req->ns->blksize_shift - 9)) + 1;
 
 	if (__blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,

From 793c7ed9d785411a5cd6fe7e998cd7ee2870b38b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 31 Mar 2017 17:00:08 +0200
Subject: [PATCH 168/410] nvmet: fix byte swap in nvmet_parse_io_cmd

We need to do arithmetics after byte swapping, not before.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/io-cmd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/io-cmd.c b/drivers/nvme/target/io-cmd.c
index e37acd77b5fe..6b0baa9caab9 100644
--- a/drivers/nvme/target/io-cmd.c
+++ b/drivers/nvme/target/io-cmd.c
@@ -230,7 +230,7 @@ int nvmet_parse_io_cmd(struct nvmet_req *req)
 		return 0;
 	case nvme_cmd_dsm:
 		req->execute = nvmet_execute_dsm;
-		req->data_len = le32_to_cpu(cmd->dsm.nr + 1) *
+		req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
 			sizeof(struct nvme_dsm_range);
 		return 0;
 	case nvme_cmd_write_zeroes:

From 51f528a1636f352ad776a912ac86026ac7a89a2a Mon Sep 17 00:00:00 2001
From: Shrirang Bagul <shrirang.bagul@canonical.com>
Date: Thu, 30 Mar 2017 23:47:21 +0800
Subject: [PATCH 169/410] iio: st_pressure: initialize lps22hb bootime

This patch initializes the bootime in struct st_sensor_settings for
lps22hb sensor. Without this, sensor channels read from sysfs always
report stale values.

Signed-off-by: Shrirang Bagul <shrirang.bagul@canonical.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/pressure/st_pressure_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 5f2680855552..fd0edca0e656 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -457,6 +457,7 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.addr_stat_drdy = ST_SENSORS_DEFAULT_STAT_ADDR,
 		},
 		.multi_read_bit = true,
+		.bootime = 2,
 	},
 };
 

From 7fd6592d1287046f61bfd3cda3c03cd35be490f7 Mon Sep 17 00:00:00 2001
From: Nikolaus Schulz <nikolaus.schulz@avionic-design.de>
Date: Fri, 24 Mar 2017 13:41:51 +0100
Subject: [PATCH 170/410] iio: core: Fix IIO_VAL_FRACTIONAL_LOG2 for negative
 values

Fix formatting of negative values of type IIO_VAL_FRACTIONAL_LOG2 by
switching from do_div(), which can't handle negative numbers, to
div_s64_rem().  Also use shift_right for shifting, which is safe with
negative values.

Signed-off-by: Nikolaus Schulz <nikolaus.schulz@avionic-design.de>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Cc: stable@vger.kernel.org
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-core.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index d18ded45bedd..3ff91e02fee3 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -610,10 +610,9 @@ static ssize_t __iio_format_value(char *buf, size_t len, unsigned int type,
 		tmp0 = (int)div_s64_rem(tmp, 1000000000, &tmp1);
 		return snprintf(buf, len, "%d.%09u", tmp0, abs(tmp1));
 	case IIO_VAL_FRACTIONAL_LOG2:
-		tmp = (s64)vals[0] * 1000000000LL >> vals[1];
-		tmp1 = do_div(tmp, 1000000000LL);
-		tmp0 = tmp;
-		return snprintf(buf, len, "%d.%09u", tmp0, tmp1);
+		tmp = shift_right((s64)vals[0] * 1000000000LL, vals[1]);
+		tmp0 = (int)div_s64_rem(tmp, 1000000000LL, &tmp1);
+		return snprintf(buf, len, "%d.%09u", tmp0, abs(tmp1));
 	case IIO_VAL_INT_MULTIPLE:
 	{
 		int i;

From 862d1d89ad9e5b117f1fb2a472cef6fc92c0007a Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 29 Mar 2017 16:23:35 -0700
Subject: [PATCH 171/410] iio: accel: hid-sensor-accel-3d: Fix duplicate scan
 index error

When both accel_3d and gravity sensor are present, iio_device_register()
fails with "Duplicate scan index" error.
The reason for this is setting of indio_dev->num_channels based on
accel_3d channel for both gravity and accel-3d sensor. But number of
channels are not same, so for gravity it is pointing to some invalid
memory and getting scan_index to compare which may match.
To fix this issue, set the indio_dev->num_channels correctly based on
the sensor type.

Fixes: 0e377f3b9ae9 ('iio: Add gravity sensor support')
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/hid-sensor-accel-3d.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index ca5759c0c318..43a6cb078193 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -370,10 +370,12 @@ static int hid_accel_3d_probe(struct platform_device *pdev)
 		name = "accel_3d";
 		channel_spec = accel_3d_channels;
 		channel_size = sizeof(accel_3d_channels);
+		indio_dev->num_channels = ARRAY_SIZE(accel_3d_channels);
 	} else {
 		name = "gravity";
 		channel_spec = gravity_channels;
 		channel_size = sizeof(gravity_channels);
+		indio_dev->num_channels = ARRAY_SIZE(gravity_channels);
 	}
 	ret = hid_sensor_parse_common_attributes(hsdev, hsdev->usage,
 					&accel_state->common_attributes);
@@ -395,7 +397,6 @@ static int hid_accel_3d_probe(struct platform_device *pdev)
 		goto error_free_dev_mem;
 	}
 
-	indio_dev->num_channels = ARRAY_SIZE(accel_3d_channels);
 	indio_dev->dev.parent = &pdev->dev;
 	indio_dev->info = &accel_3d_info;
 	indio_dev->name = name;

From bba6d9e47f3ea894e501f94b086a59ffe28241ac Mon Sep 17 00:00:00 2001
From: Song Hongyan <hongyan.song@intel.com>
Date: Tue, 28 Mar 2017 22:23:55 +0800
Subject: [PATCH 172/410] iio: hid-sensor-attributes: Fix sensor property
 setting failure.

When system bootup without get sensor property, set sensor
property will be fail.

If no get_feature operation done before set_feature, the sensor
properties will all be the initialized value, which is not the
same with sensor real properties. When set sensor property it will
write back to sensor the changed perperty data combines with other
sensor properties data, it is not right and may be dangerous.

In order to get all sensor properties, choose to read one of the sensor
properties(no matter read any sensor peroperty, driver will get all
the peroperties and return the requested one).

Fixes: 73c6768b710a ("iio: hid-sensors: Common attribute and trigger")
Signed-off-by: Song Hongyan <hongyan.song@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/common/hid-sensors/hid-sensor-attributes.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
index 7afdac42ed42..01e02b9926d4 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -379,6 +379,8 @@ int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
 {
 
 	struct hid_sensor_hub_attribute_info timestamp;
+	s32 value;
+	int ret;
 
 	hid_sensor_get_reporting_interval(hsdev, usage_id, st);
 
@@ -417,6 +419,14 @@ int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
 		st->sensitivity.index, st->sensitivity.report_id,
 		timestamp.index, timestamp.report_id);
 
+	ret = sensor_hub_get_feature(hsdev,
+				st->power_state.report_id,
+				st->power_state.index, sizeof(value), &value);
+	if (ret < 0)
+		return ret;
+	if (value < 0)
+		return -EINVAL;
+
 	return 0;
 }
 EXPORT_SYMBOL(hid_sensor_parse_common_attributes);

From 27c0e3748e41ca79171ffa3e97415a20af6facd0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 17 Feb 2017 18:42:24 -0500
Subject: [PATCH 173/410] [iov_iter] new privimitive: iov_iter_revert()

opposite to iov_iter_advance(); the caller is responsible for never
using it to move back past the initial position.

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  6 ++++-
 lib/iov_iter.c      | 63 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 804e34c6f981..f2d36a3d3005 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -39,7 +39,10 @@ struct iov_iter {
 	};
 	union {
 		unsigned long nr_segs;
-		int idx;
+		struct {
+			int idx;
+			int start_idx;
+		};
 	};
 };
 
@@ -81,6 +84,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
+void iov_iter_revert(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e68604ae3ced..60abc44385b7 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -786,6 +786,68 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 }
 EXPORT_SYMBOL(iov_iter_advance);
 
+void iov_iter_revert(struct iov_iter *i, size_t unroll)
+{
+	if (!unroll)
+		return;
+	i->count += unroll;
+	if (unlikely(i->type & ITER_PIPE)) {
+		struct pipe_inode_info *pipe = i->pipe;
+		int idx = i->idx;
+		size_t off = i->iov_offset;
+		while (1) {
+			size_t n = off - pipe->bufs[idx].offset;
+			if (unroll < n) {
+				off -= (n - unroll);
+				break;
+			}
+			unroll -= n;
+			if (!unroll && idx == i->start_idx) {
+				off = 0;
+				break;
+			}
+			if (!idx--)
+				idx = pipe->buffers - 1;
+			off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
+		}
+		i->iov_offset = off;
+		i->idx = idx;
+		pipe_truncate(i);
+		return;
+	}
+	if (unroll <= i->iov_offset) {
+		i->iov_offset -= unroll;
+		return;
+	}
+	unroll -= i->iov_offset;
+	if (i->type & ITER_BVEC) {
+		const struct bio_vec *bvec = i->bvec;
+		while (1) {
+			size_t n = (--bvec)->bv_len;
+			i->nr_segs++;
+			if (unroll <= n) {
+				i->bvec = bvec;
+				i->iov_offset = n - unroll;
+				return;
+			}
+			unroll -= n;
+		}
+	} else { /* same logics for iovec and kvec */
+		const struct iovec *iov = i->iov;
+		while (1) {
+			size_t n = (--iov)->iov_len;
+			i->nr_segs++;
+			if (unroll <= n) {
+				i->iov = iov;
+				i->iov_offset = n - unroll;
+				return;
+			}
+			unroll -= n;
+		}
+	}
+}
+EXPORT_SYMBOL(iov_iter_revert);
+
 /*
  * Return the count of just the current iov_iter segment.
  */
@@ -839,6 +901,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction,
 	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
 	i->iov_offset = 0;
 	i->count = count;
+	i->start_idx = i->idx;
 }
 EXPORT_SYMBOL(iov_iter_pipe);
 

From 3278682123811dd8ef07de5eb701fc4548fcebf2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 17 Feb 2017 20:16:34 -0500
Subject: [PATCH 174/410] make skb_copy_datagram_msg() et.al. preserve
 ->msg_iter on error

Fixes the mess observed in e.g. rsync over a noisy link we'd been
seeing since last Summer.  What happens is that we copy part of
a datagram before noticing a checksum mismatch.  Datagram will be
resent, all right, but we want the next try go into the same place,
not after it...

All this family of primitives (copy/checksum and copy a datagram
into destination) is "all or nothing" sort of interface - either
we get 0 (meaning that copy had been successful) or we get an
error (and no way to tell how much had been copied before we ran
into whatever error it had been).  Make all of them leave iterator
unadvanced in case of errors - all callers must be able to cope
with that (an error might've been caught before the iterator had
been advanced), it costs very little to arrange, it's safer for
callers and actually fixes at least one bug in said callers.

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/core/datagram.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/net/core/datagram.c b/net/core/datagram.c
index ea633342ab0d..f4947e737f34 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -398,7 +398,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
 			   struct iov_iter *to, int len)
 {
 	int start = skb_headlen(skb);
-	int i, copy = start - offset;
+	int i, copy = start - offset, start_off = offset, n;
 	struct sk_buff *frag_iter;
 
 	trace_skb_copy_datagram_iovec(skb, len);
@@ -407,11 +407,12 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
-		if (copy_to_iter(skb->data + offset, copy, to) != copy)
+		n = copy_to_iter(skb->data + offset, copy, to);
+		offset += n;
+		if (n != copy)
 			goto short_copy;
 		if ((len -= copy) == 0)
 			return 0;
-		offset += copy;
 	}
 
 	/* Copy paged appendix. Hmm... why does this look so complicated? */
@@ -425,13 +426,14 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
 		if ((copy = end - offset) > 0) {
 			if (copy > len)
 				copy = len;
-			if (copy_page_to_iter(skb_frag_page(frag),
+			n = copy_page_to_iter(skb_frag_page(frag),
 					      frag->page_offset + offset -
-					      start, copy, to) != copy)
+					      start, copy, to);
+			offset += n;
+			if (n != copy)
 				goto short_copy;
 			if (!(len -= copy))
 				return 0;
-			offset += copy;
 		}
 		start = end;
 	}
@@ -463,6 +465,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
 	 */
 
 fault:
+	iov_iter_revert(to, offset - start_off);
 	return -EFAULT;
 
 short_copy:
@@ -613,7 +616,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 				      __wsum *csump)
 {
 	int start = skb_headlen(skb);
-	int i, copy = start - offset;
+	int i, copy = start - offset, start_off = offset;
 	struct sk_buff *frag_iter;
 	int pos = 0;
 	int n;
@@ -623,11 +626,11 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 		if (copy > len)
 			copy = len;
 		n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
+		offset += n;
 		if (n != copy)
 			goto fault;
 		if ((len -= copy) == 0)
 			return 0;
-		offset += copy;
 		pos = copy;
 	}
 
@@ -649,12 +652,12 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 						  offset - start, copy,
 						  &csum2, to);
 			kunmap(page);
+			offset += n;
 			if (n != copy)
 				goto fault;
 			*csump = csum_block_add(*csump, csum2, pos);
 			if (!(len -= copy))
 				return 0;
-			offset += copy;
 			pos += copy;
 		}
 		start = end;
@@ -687,6 +690,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 		return 0;
 
 fault:
+	iov_iter_revert(to, offset - start_off);
 	return -EFAULT;
 }
 
@@ -771,6 +775,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
 	}
 	return 0;
 csum_error:
+	iov_iter_revert(&msg->msg_iter, chunk);
 	return -EINVAL;
 fault:
 	return -EFAULT;

From 1c99de981f30b3e7868b8d20ce5479fa1c0fea46 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sun, 2 Apr 2017 13:36:44 -0700
Subject: [PATCH 175/410] iscsi-target: Drop work-around for legacy GlobalSAN
 initiator

Once upon a time back in 2009, a work-around was added to support
the GlobalSAN iSCSI initiator v3.3 for MacOSX, which during login
did not propose nor respond to MaxBurstLength, FirstBurstLength,
DefaultTime2Wait and DefaultTime2Retain keys.

The work-around in iscsi_check_proposer_for_optional_reply()
allowed the missing keys to be proposed, but did not require
waiting for a response before moving to full feature phase
operation.  This allowed GlobalSAN v3.3 to work out-of-the
box, and for many years we didn't run into login interopt
issues with any other initiators..

Until recently, when Martin tried a QLogic 57840S iSCSI Offload
HBA on Windows 2016 which completed login, but subsequently
failed with:

    Got unknown iSCSI OpCode: 0x43

The issue was QLogic MSFT side did not propose DefaultTime2Wait +
DefaultTime2Retain, so LIO proposes them itself, and immediately
transitions to full feature phase because of the GlobalSAN hack.
However, the QLogic MSFT side still attempts to respond to
DefaultTime2Retain + DefaultTime2Wait, even though LIO has set
ISCSI_FLAG_LOGIN_NEXT_STAGE3 + ISCSI_FLAG_LOGIN_TRANSIT
in last login response.

So while the QLogic MSFT side should have been proposing these
two keys to start, it was doing the correct thing per RFC-3720
attempting to respond to proposed keys before transitioning to
full feature phase.

All that said, recent versions of GlobalSAN iSCSI (v5.3.0.541)
does correctly propose the four keys during login, making the
original work-around moot.

So in order to allow QLogic MSFT to run unmodified as-is, go
ahead and drop this long standing work-around.

Reported-by: Martin Svec <martin.svec@zoner.cz>
Cc: Martin Svec <martin.svec@zoner.cz>
Cc: Himanshu Madhani <Himanshu.Madhani@cavium.com>
Cc: Arun Easi <arun.easi@cavium.com>
Cc: stable@vger.kernel.org # 3.1+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target_parameters.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index e65bf78ceef3..fce627628200 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -781,22 +781,6 @@ static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param)
 	} else if (IS_TYPE_NUMBER(param)) {
 		if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH))
 			SET_PSTATE_REPLY_OPTIONAL(param);
-		/*
-		 * The GlobalSAN iSCSI Initiator for MacOSX does
-		 * not respond to MaxBurstLength, FirstBurstLength,
-		 * DefaultTime2Wait or DefaultTime2Retain parameter keys.
-		 * So, we set them to 'reply optional' here, and assume the
-		 * the defaults from iscsi_parameters.h if the initiator
-		 * is not RFC compliant and the keys are not negotiated.
-		 */
-		if (!strcmp(param->name, MAXBURSTLENGTH))
-			SET_PSTATE_REPLY_OPTIONAL(param);
-		if (!strcmp(param->name, FIRSTBURSTLENGTH))
-			SET_PSTATE_REPLY_OPTIONAL(param);
-		if (!strcmp(param->name, DEFAULTTIME2WAIT))
-			SET_PSTATE_REPLY_OPTIONAL(param);
-		if (!strcmp(param->name, DEFAULTTIME2RETAIN))
-			SET_PSTATE_REPLY_OPTIONAL(param);
 		/*
 		 * Required for gPXE iSCSI boot client
 		 */

From a5d68ba85801a78c892a0eb8efb711e293ed314b Mon Sep 17 00:00:00 2001
From: Xiubo Li <lixiubo@cmss.chinamobile.com>
Date: Fri, 31 Mar 2017 10:35:25 +0800
Subject: [PATCH 176/410] tcmu: Skip Data-Out blocks before gathering Data-In
 buffer for BIDI case

For the bidirectional case, the Data-Out buffer blocks will always at
the head of the tcmu_cmd's bitmap, and before gathering the Data-In
buffer, first of all it should skip the Data-Out ones, or the device
supporting BIDI commands won't work.

Fixed: 26418649eead ("target/user: Introduce data_bitmap, replace
		data_length/data_head/data_tail")
Reported-by: Ilias Tsitsimpis <iliastsi@arrikto.com>
Tested-by: Ilias Tsitsimpis <iliastsi@arrikto.com>
Signed-off-by: Xiubo Li <lixiubo@cmss.chinamobile.com>
Cc: stable@vger.kernel.org # 4.6+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/target_core_user.c | 48 +++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 9885d1b521fe..f615c3bbb73e 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -311,24 +311,50 @@ static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd)
 		   DATA_BLOCK_BITS);
 }
 
-static void gather_data_area(struct tcmu_dev *udev, unsigned long *cmd_bitmap,
-		struct scatterlist *data_sg, unsigned int data_nents)
+static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd,
+			     bool bidi)
 {
+	struct se_cmd *se_cmd = cmd->se_cmd;
 	int i, block;
 	int block_remaining = 0;
 	void *from, *to;
 	size_t copy_bytes, from_offset;
-	struct scatterlist *sg;
+	struct scatterlist *sg, *data_sg;
+	unsigned int data_nents;
+	DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
+
+	bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
+
+	if (!bidi) {
+		data_sg = se_cmd->t_data_sg;
+		data_nents = se_cmd->t_data_nents;
+	} else {
+		uint32_t count;
+
+		/*
+		 * For bidi case, the first count blocks are for Data-Out
+		 * buffer blocks, and before gathering the Data-In buffer
+		 * the Data-Out buffer blocks should be discarded.
+		 */
+		count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE);
+		while (count--) {
+			block = find_first_bit(bitmap, DATA_BLOCK_BITS);
+			clear_bit(block, bitmap);
+		}
+
+		data_sg = se_cmd->t_bidi_data_sg;
+		data_nents = se_cmd->t_bidi_data_nents;
+	}
 
 	for_each_sg(data_sg, sg, data_nents, i) {
 		int sg_remaining = sg->length;
 		to = kmap_atomic(sg_page(sg)) + sg->offset;
 		while (sg_remaining > 0) {
 			if (block_remaining == 0) {
-				block = find_first_bit(cmd_bitmap,
+				block = find_first_bit(bitmap,
 						DATA_BLOCK_BITS);
 				block_remaining = DATA_BLOCK_SIZE;
-				clear_bit(block, cmd_bitmap);
+				clear_bit(block, bitmap);
 			}
 			copy_bytes = min_t(size_t, sg_remaining,
 					block_remaining);
@@ -610,19 +636,11 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
 			       se_cmd->scsi_sense_length);
 		free_data_area(udev, cmd);
 	} else if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
-
 		/* Get Data-In buffer before clean up */
-		bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
-		gather_data_area(udev, bitmap,
-			se_cmd->t_bidi_data_sg, se_cmd->t_bidi_data_nents);
+		gather_data_area(udev, cmd, true);
 		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
-		DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS);
-
-		bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS);
-		gather_data_area(udev, bitmap,
-			se_cmd->t_data_sg, se_cmd->t_data_nents);
+		gather_data_area(udev, cmd, false);
 		free_data_area(udev, cmd);
 	} else if (se_cmd->data_direction == DMA_TO_DEVICE) {
 		free_data_area(udev, cmd);

From 75514b6654859e0130b512396dc964d2a9e84967 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 31 Mar 2017 18:41:23 -0500
Subject: [PATCH 177/410] net: ethernet: ti: cpsw: wake tx queues on
 ndo_tx_timeout

In case, if TX watchdog is fired some or all netdev TX queues will be
stopped and as part of recovery it is required not only to drain and
reinitailize CPSW TX channeles, but also wake up stoppted TX queues what
doesn't happen now and netdevice will stop transmiting data until
reopenned.

Hence, add netif_tx_wake_all_queues() call in .ndo_tx_timeout() to complete
recovery and restore TX path.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 9f3d9c67e3fe..58cdc066ef2c 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1817,6 +1817,8 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev)
 	}
 
 	cpsw_intr_enable(cpsw);
+	netif_trans_update(ndev);
+	netif_tx_wake_all_queues(ndev);
 }
 
 static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)

From 921d701e6f31e1ffaca3560416af1aa04edb4c4f Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Sun, 2 Apr 2017 20:08:04 -0700
Subject: [PATCH 178/410] nios2: reserve boot memory for device tree

Make sure to reserve the boot memory for the flattened device tree.
Otherwise it might get overwritten, e.g. when initial_boot_params is
copied, leading to a corrupted FDT and a boot hang/crash:

  bootconsole [early0] enabled
  Early console on uart16650 initialized at 0xf8001600
  OF: fdt: Error -11 processing FDT
  Kernel panic - not syncing: setup_cpuinfo: No CPU found in devicetree!

  ---[ end Kernel panic - not syncing: setup_cpuinfo: No CPU found in devicetree!

Guenter Roeck says:

> I think I found the problem. In unflatten_and_copy_device_tree(), with added
> debug information:
>
> OF: fdt: initial_boot_params=c861e400, dt=c861f000 size=28874 (0x70ca)
>
> ... and then initial_boot_params is copied to dt, which results in corrupted
> fdt since the memory overlaps. Looks like the initial_boot_params memory
> is not reserved and (re-)allocated by early_init_dt_alloc_memory_arch().

Cc: stable@vger.kernel.org
Reported-by: Guenter Roeck <linux@roeck-us.net>
Reference: http://lkml.kernel.org/r/20170226210338.GA19476@roeck-us.net
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Acked-by: Ley Foon Tan <ley.foon.tan@intel.com>
---
 arch/nios2/kernel/prom.c  | 7 +++++++
 arch/nios2/kernel/setup.c | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index 367c5426157b..3901b80d4420 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -48,6 +48,13 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 	return alloc_bootmem_align(size, align);
 }
 
+int __init early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size,
+					     bool nomap)
+{
+	reserve_bootmem(base, size, BOOTMEM_DEFAULT);
+	return 0;
+}
+
 void __init early_init_devtree(void *params)
 {
 	__be32 *dtb = (u32 *)__dtb_start;
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 6e57ffa5db27..6044d9be28b4 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -201,6 +201,9 @@ void __init setup_arch(char **cmdline_p)
 	}
 #endif /* CONFIG_BLK_DEV_INITRD */
 
+	early_init_fdt_reserve_self();
+	early_init_fdt_scan_reserved_mem();
+
 	unflatten_and_copy_device_tree();
 
 	setup_cpuinfo();

From 75dd7e4bb663c7047c7d1bd4dad26f8c048851be Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 31 Mar 2017 18:31:25 +0100
Subject: [PATCH 179/410] Documentation/filesystems: fix documentation for
 ->getattr()

Following the recent merge of statx, correct the documented prototype
for the ->getattr() inode operation, and add an entry to the porting
file.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking | 3 +--
 Documentation/filesystems/porting | 6 ++++++
 Documentation/filesystems/vfs.txt | 3 +--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index fdcfdd79682a..fe25787ff6d4 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -58,8 +58,7 @@ prototypes:
 	int (*permission) (struct inode *, int, unsigned int);
 	int (*get_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (const struct path *, struct dentry *, struct kstat *,
-			u32, unsigned int);
+	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 	void (*update_time)(struct inode *, struct timespec *, int);
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 95280079c0b3..5fb17f49f7a2 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -600,3 +600,9 @@ in your dentry operations instead.
 [recommended]
 	->readlink is optional for symlinks.  Don't set, unless filesystem needs
 	to fake something for readlink(2).
+--
+[mandatory]
+	->getattr() is now passed a struct path rather than a vfsmount and
+	dentry separately, and it now has request_mask and query_flags arguments
+	to specify the fields and sync type requested by statx.  Filesystems not
+	supporting any statx-specific features may ignore the new arguments.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 569211703721..94dd27ef4a76 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -382,8 +382,7 @@ struct inode_operations {
 	int (*permission) (struct inode *, int);
 	int (*get_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (const struct path *, struct dentry *, struct kstat *,
-			u32, unsigned int);
+	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	void (*update_time)(struct inode *, struct timespec *, int);
 	int (*atomic_open)(struct inode *, struct dentry *, struct file *,

From 8c7493aa3e9ae90f90196f4d4c1398ad143cba7b Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 31 Mar 2017 18:31:32 +0100
Subject: [PATCH 180/410] statx: reject unknown flags when using NULL path

The statx() system call currently accepts unknown flags when called with
a NULL path to operate on a file descriptor.  Left unchanged, this could
make it hard to introduce new query flags in the future, since
applications may not be able to tell whether a given flag is supported.

Fix this by failing the system call with EINVAL if any flags other than
KSTAT_QUERY_FLAGS are specified in combination with a NULL path.

Arguably, we could still permit known lookup-related flags such as
AT_SYMLINK_NOFOLLOW.  However, that would be inconsistent with how
sys_utimensat() behaves when passed a NULL path, which seems to be the
closest precedent.  And given that the NULL path case is (I believe)
mainly intended to be used to implement a wrapper function like fstatx()
that doesn't have a path argument, I think rejecting lookup-related
flags too is probably the best choice.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/stat.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/stat.c b/fs/stat.c
index fa0be59340cc..df484a60846d 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -130,9 +130,13 @@ EXPORT_SYMBOL(vfs_getattr);
 int vfs_statx_fd(unsigned int fd, struct kstat *stat,
 		 u32 request_mask, unsigned int query_flags)
 {
-	struct fd f = fdget_raw(fd);
+	struct fd f;
 	int error = -EBADF;
 
+	if (query_flags & ~KSTAT_QUERY_FLAGS)
+		return -EINVAL;
+
+	f = fdget_raw(fd);
 	if (f.file) {
 		error = vfs_getattr(&f.file->f_path, stat,
 				    request_mask, query_flags);

From b15fb70b82299f92bb8d591c9d1731cb23fa8290 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 31 Mar 2017 18:31:40 +0100
Subject: [PATCH 181/410] statx: remove incorrect part of vfs_statx() comment

request_mask and query_flags are function arguments, not passed in
struct kstat.  So remove the part of the comment which claims otherwise.
This was apparently left over from an earlier version of the statx
patch.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/stat.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/stat.c b/fs/stat.c
index df484a60846d..b792dd201c31 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -159,9 +159,6 @@ EXPORT_SYMBOL(vfs_statx_fd);
  * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink
  * at the given name from being referenced.
  *
- * The caller must have preset stat->request_mask as for vfs_getattr().  The
- * flags are also used to load up stat->query_flags.
- *
  * 0 will be returned on success, and a -ve error code if unsuccessful.
  */
 int vfs_statx(int dfd, const char __user *filename, int flags,

From 64bd72048a2ac07efed70debe606a1c6e5e03554 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 31 Mar 2017 18:31:48 +0100
Subject: [PATCH 182/410] statx: optimize copy of struct statx to userspace

I found that statx() was significantly slower than stat().  As a
microbenchmark, I compared 10,000,000 invocations of fstat() on a tmpfs
file to the same with statx() passed a NULL path:

	$ time ./stat_benchmark

	real	0m1.464s
	user	0m0.275s
	sys	0m1.187s

	$ time ./statx_benchmark

	real	0m5.530s
	user	0m0.281s
	sys	0m5.247s

statx is expected to be a little slower than stat because struct statx
is larger than struct stat, but not by *that* much.  It turns out that
most of the overhead was in copying struct statx to userspace, mostly in
all the stac/clac instructions that got generated for each __put_user()
call.  (This was on x86_64, but some other architectures, e.g. arm64,
have something similar now too.)

stat() instead initializes its struct on the stack and copies it to
userspace with a single call to copy_to_user().  This turns out to be
much faster, and changing statx to do this makes it almost as fast as
stat:

	$ time ./statx_benchmark

	real	0m1.624s
	user	0m0.270s
	sys	0m1.354s

For zeroing the reserved fields, start by zeroing the full struct with
memset.  This makes it clear that every byte copied to userspace is
initialized, even implicit padding bytes (though there are none
currently).  In the scenarios I tested, it also performed the same as a
designated initializer.  Manually initializing each field was still
slightly faster, but would have been more error-prone and less
verifiable.

Also rename statx_set_result() to cp_statx() for consistency with
cp_old_stat() et al., and make it noinline so that struct statx doesn't
add to the stack usage during the main portion of the syscall execution.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/stat.c | 68 ++++++++++++++++++++++++-------------------------------
 1 file changed, 29 insertions(+), 39 deletions(-)

diff --git a/fs/stat.c b/fs/stat.c
index b792dd201c31..ab27f2868588 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -510,46 +510,37 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename,
 }
 #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */
 
-static inline int __put_timestamp(struct timespec *kts,
-				  struct statx_timestamp __user *uts)
+static noinline_for_stack int
+cp_statx(const struct kstat *stat, struct statx __user *buffer)
 {
-	return (__put_user(kts->tv_sec,		&uts->tv_sec		) ||
-		__put_user(kts->tv_nsec,	&uts->tv_nsec		) ||
-		__put_user(0,			&uts->__reserved	));
-}
+	struct statx tmp;
 
-/*
- * Set the statx results.
- */
-static long statx_set_result(struct kstat *stat, struct statx __user *buffer)
-{
-	uid_t uid = from_kuid_munged(current_user_ns(), stat->uid);
-	gid_t gid = from_kgid_munged(current_user_ns(), stat->gid);
+	memset(&tmp, 0, sizeof(tmp));
 
-	if (__put_user(stat->result_mask,	&buffer->stx_mask	) ||
-	    __put_user(stat->mode,		&buffer->stx_mode	) ||
-	    __clear_user(&buffer->__spare0, sizeof(buffer->__spare0))	  ||
-	    __put_user(stat->nlink,		&buffer->stx_nlink	) ||
-	    __put_user(uid,			&buffer->stx_uid	) ||
-	    __put_user(gid,			&buffer->stx_gid	) ||
-	    __put_user(stat->attributes,	&buffer->stx_attributes	) ||
-	    __put_user(stat->blksize,		&buffer->stx_blksize	) ||
-	    __put_user(MAJOR(stat->rdev),	&buffer->stx_rdev_major	) ||
-	    __put_user(MINOR(stat->rdev),	&buffer->stx_rdev_minor	) ||
-	    __put_user(MAJOR(stat->dev),	&buffer->stx_dev_major	) ||
-	    __put_user(MINOR(stat->dev),	&buffer->stx_dev_minor	) ||
-	    __put_timestamp(&stat->atime,	&buffer->stx_atime	) ||
-	    __put_timestamp(&stat->btime,	&buffer->stx_btime	) ||
-	    __put_timestamp(&stat->ctime,	&buffer->stx_ctime	) ||
-	    __put_timestamp(&stat->mtime,	&buffer->stx_mtime	) ||
-	    __put_user(stat->ino,		&buffer->stx_ino	) ||
-	    __put_user(stat->size,		&buffer->stx_size	) ||
-	    __put_user(stat->blocks,		&buffer->stx_blocks	) ||
-	    __clear_user(&buffer->__spare1, sizeof(buffer->__spare1))	  ||
-	    __clear_user(&buffer->__spare2, sizeof(buffer->__spare2)))
-		return -EFAULT;
+	tmp.stx_mask = stat->result_mask;
+	tmp.stx_blksize = stat->blksize;
+	tmp.stx_attributes = stat->attributes;
+	tmp.stx_nlink = stat->nlink;
+	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
+	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	tmp.stx_mode = stat->mode;
+	tmp.stx_ino = stat->ino;
+	tmp.stx_size = stat->size;
+	tmp.stx_blocks = stat->blocks;
+	tmp.stx_atime.tv_sec = stat->atime.tv_sec;
+	tmp.stx_atime.tv_nsec = stat->atime.tv_nsec;
+	tmp.stx_btime.tv_sec = stat->btime.tv_sec;
+	tmp.stx_btime.tv_nsec = stat->btime.tv_nsec;
+	tmp.stx_ctime.tv_sec = stat->ctime.tv_sec;
+	tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec;
+	tmp.stx_mtime.tv_sec = stat->mtime.tv_sec;
+	tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec;
+	tmp.stx_rdev_major = MAJOR(stat->rdev);
+	tmp.stx_rdev_minor = MINOR(stat->rdev);
+	tmp.stx_dev_major = MAJOR(stat->dev);
+	tmp.stx_dev_minor = MINOR(stat->dev);
 
-	return 0;
+	return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0;
 }
 
 /**
@@ -573,8 +564,6 @@ SYSCALL_DEFINE5(statx,
 
 	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
 		return -EINVAL;
-	if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer)))
-		return -EFAULT;
 
 	if (filename)
 		error = vfs_statx(dfd, filename, flags, &stat, mask);
@@ -582,7 +571,8 @@ SYSCALL_DEFINE5(statx,
 		error = vfs_statx_fd(dfd, &stat, mask, flags);
 	if (error)
 		return error;
-	return statx_set_result(&stat, buffer);
+
+	return cp_statx(&stat, buffer);
 }
 
 /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */

From 99652ea56a4186bc5bf8a3721c5353f41b35ebcb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 31 Mar 2017 18:31:56 +0100
Subject: [PATCH 183/410] ext4: Add statx support

Return enhanced file attributes from the Ext4 filesystem.  This includes
the following:

 (1) The inode creation time (i_crtime) as stx_btime, setting STATX_BTIME.

 (2) Certain FS_xxx_FL flags are mapped to stx_attribute flags.

This requires that all ext4 inodes have a getattr call, not just some of
them, so to this end, split the ext4_getattr() function and only call part
of it where appropriate.

Example output:

	[root@andromeda ~]# touch foo
	[root@andromeda ~]# chattr +ai foo
	[root@andromeda ~]# /tmp/test-statx foo
	statx(foo) = 0
	results=fff
	  Size: 0               Blocks: 0          IO Block: 4096    regular file
	Device: 08:12           Inode: 2101950     Links: 1
	Access: (0644/-rw-r--r--)  Uid:     0   Gid:     0
	Access: 2016-02-11 17:08:29.031795451+0000
	Modify: 2016-02-11 17:08:29.031795451+0000
	Change: 2016-02-11 17:11:11.987790114+0000
	 Birth: 2016-02-11 17:08:29.031795451+0000
	Attributes: 0000000000000030 (-------- -------- -------- -------- -------- -------- -------- --ai----)

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ext4.h    |  1 +
 fs/ext4/file.c    |  2 +-
 fs/ext4/inode.c   | 35 ++++++++++++++++++++++++++++++++---
 fs/ext4/namei.c   |  2 ++
 fs/ext4/symlink.c |  3 +++
 5 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f493af666591..fb69ee2388db 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2466,6 +2466,7 @@ extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern void ext4_evict_inode(struct inode *);
 extern void ext4_clear_inode(struct inode *);
+extern int  ext4_file_getattr(const struct path *, struct kstat *, u32, unsigned int);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
 extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8210c1f43556..cefa9835f275 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -744,7 +744,7 @@ const struct file_operations ext4_file_operations = {
 
 const struct inode_operations ext4_file_inode_operations = {
 	.setattr	= ext4_setattr,
-	.getattr	= ext4_getattr,
+	.getattr	= ext4_file_getattr,
 	.listxattr	= ext4_listxattr,
 	.get_acl	= ext4_get_acl,
 	.set_acl	= ext4_set_acl,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4247d8d25687..5d02b922afa3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5390,11 +5390,40 @@ err_out:
 int ext4_getattr(const struct path *path, struct kstat *stat,
 		 u32 request_mask, unsigned int query_flags)
 {
-	struct inode *inode;
-	unsigned long long delalloc_blocks;
+	struct inode *inode = d_inode(path->dentry);
+	struct ext4_inode *raw_inode;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	unsigned int flags;
+
+	if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) {
+		stat->result_mask |= STATX_BTIME;
+		stat->btime.tv_sec = ei->i_crtime.tv_sec;
+		stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
+	}
+
+	flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
+	if (flags & EXT4_APPEND_FL)
+		stat->attributes |= STATX_ATTR_APPEND;
+	if (flags & EXT4_COMPR_FL)
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+	if (flags & EXT4_ENCRYPT_FL)
+		stat->attributes |= STATX_ATTR_ENCRYPTED;
+	if (flags & EXT4_IMMUTABLE_FL)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+	if (flags & EXT4_NODUMP_FL)
+		stat->attributes |= STATX_ATTR_NODUMP;
 
-	inode = d_inode(path->dentry);
 	generic_fillattr(inode, stat);
+	return 0;
+}
+
+int ext4_file_getattr(const struct path *path, struct kstat *stat,
+		      u32 request_mask, unsigned int query_flags)
+{
+	struct inode *inode = d_inode(path->dentry);
+	u64 delalloc_blocks;
+
+	ext4_getattr(path, stat, request_mask, query_flags);
 
 	/*
 	 * If there is inline data in the inode, the inode will normally not
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6ad612c576fc..07e5e1405771 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3912,6 +3912,7 @@ const struct inode_operations ext4_dir_inode_operations = {
 	.tmpfile	= ext4_tmpfile,
 	.rename		= ext4_rename2,
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 	.get_acl	= ext4_get_acl,
 	.set_acl	= ext4_set_acl,
@@ -3920,6 +3921,7 @@ const struct inode_operations ext4_dir_inode_operations = {
 
 const struct inode_operations ext4_special_inode_operations = {
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 	.get_acl	= ext4_get_acl,
 	.set_acl	= ext4_set_acl,
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 73b184d161fc..5c8fc53cb0e5 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -85,17 +85,20 @@ errout:
 const struct inode_operations ext4_encrypted_symlink_inode_operations = {
 	.get_link	= ext4_encrypted_get_link,
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 };
 
 const struct inode_operations ext4_symlink_inode_operations = {
 	.get_link	= page_get_link,
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 };
 
 const struct inode_operations ext4_fast_symlink_inode_operations = {
 	.get_link	= simple_get_link,
 	.setattr	= ext4_setattr,
+	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
 };

From 5f955f26f3d42d04aba65590a32eb70eedb7f37d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 31 Mar 2017 18:32:03 +0100
Subject: [PATCH 184/410] xfs: report crtime and attribute flags to statx

statx has the ability to report inode creation times and inode flags, so
hook up di_crtime and di_flags to that functionality.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/xfs/xfs_iops.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 229cc6a6d8ef..ebfc13350f9a 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -516,6 +516,20 @@ xfs_vn_getattr(
 	stat->blocks =
 		XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
 
+	if (ip->i_d.di_version == 3) {
+		if (request_mask & STATX_BTIME) {
+			stat->result_mask |= STATX_BTIME;
+			stat->btime.tv_sec = ip->i_d.di_crtime.t_sec;
+			stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
+		}
+	}
+
+	if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
+		stat->attributes |= STATX_ATTR_IMMUTABLE;
+	if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
+		stat->attributes |= STATX_ATTR_APPEND;
+	if (ip->i_d.di_flags & XFS_DIFLAG_NODUMP)
+		stat->attributes |= STATX_ATTR_NODUMP;
 
 	switch (inode->i_mode & S_IFMT) {
 	case S_IFBLK:

From 47071aee6a1956524b9929b3b821f6d2f8cae23c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 31 Mar 2017 18:32:10 +0100
Subject: [PATCH 185/410] statx: Reserve the top bit of the mask for future
 struct expansion

Reserve the top bit of the mask for future expansion of the statx struct
and give an error if statx() sees it set.  All the other bits are ignored
if we see them set but don't support the bit; we just clear the bit in the
returned mask.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/stat.c                 | 2 ++
 include/uapi/linux/stat.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/stat.c b/fs/stat.c
index ab27f2868588..0c7e6cdc435c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -562,6 +562,8 @@ SYSCALL_DEFINE5(statx,
 	struct kstat stat;
 	int error;
 
+	if (mask & STATX__RESERVED)
+		return -EINVAL;
 	if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE)
 		return -EINVAL;
 
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 51a6b86e3700..0869b9eaa8ce 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -152,6 +152,7 @@ struct statx {
 #define STATX_BASIC_STATS	0x000007ffU	/* The stuff in the normal stat struct */
 #define STATX_BTIME		0x00000800U	/* Want/got stx_btime */
 #define STATX_ALL		0x00000fffU	/* All currently supported flags */
+#define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
 /*
  * Attributes to be found in stx_attributes

From 3209f68b3ca4667069923a325c88b21131bfdf9f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 31 Mar 2017 18:32:17 +0100
Subject: [PATCH 186/410] statx: Include a mask for stx_attributes in struct
 statx

Include a mask in struct stat to indicate which bits of stx_attributes the
filesystem actually supports.

This would also be useful if we add another system call that allows you to
do a 'bulk attribute set' and pass in a statx struct with the masks
appropriately set to say what you want to set.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/inode.c            |  6 ++++++
 fs/stat.c                  |  1 +
 include/linux/stat.h       |  1 +
 include/uapi/linux/stat.h  |  4 ++--
 samples/statx/test-statx.c | 12 ++++++++----
 5 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5d02b922afa3..b9ffa9f4191f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5413,6 +5413,12 @@ int ext4_getattr(const struct path *path, struct kstat *stat,
 	if (flags & EXT4_NODUMP_FL)
 		stat->attributes |= STATX_ATTR_NODUMP;
 
+	stat->attributes_mask |= (STATX_ATTR_APPEND |
+				  STATX_ATTR_COMPRESSED |
+				  STATX_ATTR_ENCRYPTED |
+				  STATX_ATTR_IMMUTABLE |
+				  STATX_ATTR_NODUMP);
+
 	generic_fillattr(inode, stat);
 	return 0;
 }
diff --git a/fs/stat.c b/fs/stat.c
index 0c7e6cdc435c..c6c963b2546b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -527,6 +527,7 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_ino = stat->ino;
 	tmp.stx_size = stat->size;
 	tmp.stx_blocks = stat->blocks;
+	tmp.stx_attributes_mask = stat->attributes_mask;
 	tmp.stx_atime.tv_sec = stat->atime.tv_sec;
 	tmp.stx_atime.tv_nsec = stat->atime.tv_nsec;
 	tmp.stx_btime.tv_sec = stat->btime.tv_sec;
diff --git a/include/linux/stat.h b/include/linux/stat.h
index c76e524fb34b..64b6b3aece21 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -26,6 +26,7 @@ struct kstat {
 	unsigned int	nlink;
 	uint32_t	blksize;	/* Preferred I/O size */
 	u64		attributes;
+	u64		attributes_mask;
 #define KSTAT_ATTR_FS_IOC_FLAGS				\
 	(STATX_ATTR_COMPRESSED |			\
 	 STATX_ATTR_IMMUTABLE |				\
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 0869b9eaa8ce..d538897b8e08 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -114,7 +114,7 @@ struct statx {
 	__u64	stx_ino;	/* Inode number */
 	__u64	stx_size;	/* File size */
 	__u64	stx_blocks;	/* Number of 512-byte blocks allocated */
-	__u64	__spare1[1];
+	__u64	stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
 	/* 0x40 */
 	struct statx_timestamp	stx_atime;	/* Last access time */
 	struct statx_timestamp	stx_btime;	/* File creation time */
@@ -155,7 +155,7 @@ struct statx {
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
 /*
- * Attributes to be found in stx_attributes
+ * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
  *
  * These give information about the features or the state of a file that might
  * be of use to ordinary userspace programs such as GUIs or ls rather than
diff --git a/samples/statx/test-statx.c b/samples/statx/test-statx.c
index 8571d766331d..d4d77b09412c 100644
--- a/samples/statx/test-statx.c
+++ b/samples/statx/test-statx.c
@@ -141,8 +141,8 @@ static void dump_statx(struct statx *stx)
 	if (stx->stx_mask & STATX_BTIME)
 		print_time(" Birth: ", &stx->stx_btime);
 
-	if (stx->stx_attributes) {
-		unsigned char bits;
+	if (stx->stx_attributes_mask) {
+		unsigned char bits, mbits;
 		int loop, byte;
 
 		static char attr_representation[64 + 1] =
@@ -160,14 +160,18 @@ static void dump_statx(struct statx *stx)
 		printf("Attributes: %016llx (", stx->stx_attributes);
 		for (byte = 64 - 8; byte >= 0; byte -= 8) {
 			bits = stx->stx_attributes >> byte;
+			mbits = stx->stx_attributes_mask >> byte;
 			for (loop = 7; loop >= 0; loop--) {
 				int bit = byte + loop;
 
-				if (bits & 0x80)
+				if (!(mbits & 0x80))
+					putchar('.');	/* Not supported */
+				else if (bits & 0x80)
 					putchar(attr_representation[63 - bit]);
 				else
-					putchar('-');
+					putchar('-');	/* Not set */
 				bits <<= 1;
+				mbits <<= 1;
 			}
 			if (byte)
 				putchar(' ');

From 2b5efc089769cd2aa583880d29416d00e7441f39 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 25 Mar 2017 00:43:43 -0400
Subject: [PATCH 187/410] alpha: fix stack smashing in old_adjtimex(2)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/osf_sys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 0b961093ca5c..6d76e528ab8f 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1290,7 +1290,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
 	/* copy relevant bits of struct timex. */
 	if (copy_from_user(&txc, txc_p, offsetof(struct timex32, time)) ||
 	    copy_from_user(&txc.tick, &txc_p->tick, sizeof(struct timex32) - 
-			   offsetof(struct timex32, time)))
+			   offsetof(struct timex32, tick)))
 	  return -EFAULT;
 
 	ret = do_adjtimex(&txc);	

From 232b8e3b1d4946a45e3b9dd4c282b12a085dd39d Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 31 Mar 2017 12:50:48 +0200
Subject: [PATCH 188/410] KVM: s390: remove change-recording override support

Change-recording override (CO) was never implemented in any
machine. According to the architecture it is unpredictable if a
translation-specification exception will be recognized if the bit is
set and EDAT1 does not apply.
Therefore the easiest solution is to simply ignore the bit.

This also fixes commit cd1836f583d7 ("KVM: s390:
instruction-execution-protection support"). A guest may enable
instruction-execution-protection (IEP) but not EDAT1. In such a case
the guest_translate() function (arch/s390/kvm/gaccess.c) will report a
specification exception on pages that have the IEP bit set while it
should not.

It might make sense to add full IEP support to guest_translate() and
the GACC_IFETCH case. However, as far as I can tell the GACC_IFETCH
case is currently only used after an instruction was executed in order
to fetch the failing instruction. So there is no additional problem
*currently*.

Fixes: cd1836f583d7 ("KVM: s390: instruction-execution-protection support")
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/gaccess.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index d55c829a5944..ddbffb715b40 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -168,8 +168,7 @@ union page_table_entry {
 		unsigned long z  : 1; /* Zero Bit */
 		unsigned long i  : 1; /* Page-Invalid Bit */
 		unsigned long p  : 1; /* DAT-Protection Bit */
-		unsigned long co : 1; /* Change-Recording Override */
-		unsigned long	 : 8;
+		unsigned long	 : 9;
 	};
 };
 
@@ -745,8 +744,6 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
 		return PGM_PAGE_TRANSLATION;
 	if (pte.z)
 		return PGM_TRANSLATION_SPEC;
-	if (pte.co && !edat1)
-		return PGM_TRANSLATION_SPEC;
 	dat_protection |= pte.p;
 	raddr.pfra = pte.pfra;
 real_address:
@@ -1182,7 +1179,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
 		rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
 	if (!rc && pte.i)
 		rc = PGM_PAGE_TRANSLATION;
-	if (!rc && (pte.z || (pte.co && sg->edat_level < 1)))
+	if (!rc && pte.z)
 		rc = PGM_TRANSLATION_SPEC;
 shadow_page:
 	pte.p |= dat_protection;

From 78420281a9d74014af7616958806c3aba056319e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 3 Apr 2017 12:22:20 -0700
Subject: [PATCH 189/410] xfs: rework the inline directory verifiers

The inline directory verifiers should be called on the inode fork data,
which means after iformat_local on the read side, and prior to
ifork_flush on the write side.  This makes the fork verifier more
consistent with the way buffer verifiers work -- i.e. they will operate
on the memory buffer that the code will be reading and writing directly.

Furthermore, revise the verifier function to return -EFSCORRUPTED so
that we don't flood the logs with corruption messages and assert
notices.  This has been a particular problem with xfs/348, which
triggers the XFS_WANT_CORRUPTED_RETURN assertions, which halts the
kernel when CONFIG_XFS_DEBUG=y.  Disk corruption isn't supposed to do
that, at least not in a verifier.

Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_dir2_priv.h  |  3 +-
 fs/xfs/libxfs/xfs_dir2_sf.c    | 63 ++++++++++++++++++++++------------
 fs/xfs/libxfs/xfs_inode_fork.c | 35 +++++++------------
 fs/xfs/libxfs/xfs_inode_fork.h |  2 +-
 fs/xfs/xfs_inode.c             | 19 +++++-----
 5 files changed, 66 insertions(+), 56 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index eb00bc133bca..39f8604f764e 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -125,8 +125,7 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
 extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
 extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
 extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_sf_verify(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *sfp,
-		int size);
+extern int xfs_dir2_sf_verify(struct xfs_inode *ip);
 
 /* xfs_dir2_readdir.c */
 extern int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx,
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 96b45cd6c63f..e84af093b2ab 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -632,36 +632,49 @@ xfs_dir2_sf_check(
 /* Verify the consistency of an inline directory. */
 int
 xfs_dir2_sf_verify(
-	struct xfs_mount		*mp,
-	struct xfs_dir2_sf_hdr		*sfp,
-	int				size)
+	struct xfs_inode		*ip)
 {
+	struct xfs_mount		*mp = ip->i_mount;
+	struct xfs_dir2_sf_hdr		*sfp;
 	struct xfs_dir2_sf_entry	*sfep;
 	struct xfs_dir2_sf_entry	*next_sfep;
 	char				*endp;
 	const struct xfs_dir_ops	*dops;
+	struct xfs_ifork		*ifp;
 	xfs_ino_t			ino;
 	int				i;
 	int				i8count;
 	int				offset;
+	int				size;
+	int				error;
 	__uint8_t			filetype;
 
+	ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
+	/*
+	 * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops,
+	 * so we can only trust the mountpoint to have the right pointer.
+	 */
 	dops = xfs_dir_get_ops(mp, NULL);
 
+	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+	sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
+	size = ifp->if_bytes;
+
 	/*
 	 * Give up if the directory is way too short.
 	 */
-	XFS_WANT_CORRUPTED_RETURN(mp, size >
-			offsetof(struct xfs_dir2_sf_hdr, parent));
-	XFS_WANT_CORRUPTED_RETURN(mp, size >=
-			xfs_dir2_sf_hdr_size(sfp->i8count));
+	if (size <= offsetof(struct xfs_dir2_sf_hdr, parent) ||
+	    size < xfs_dir2_sf_hdr_size(sfp->i8count))
+		return -EFSCORRUPTED;
 
 	endp = (char *)sfp + size;
 
 	/* Check .. entry */
 	ino = dops->sf_get_parent_ino(sfp);
 	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
-	XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+	error = xfs_dir_ino_validate(mp, ino);
+	if (error)
+		return error;
 	offset = dops->data_first_offset;
 
 	/* Check all reported entries */
@@ -672,12 +685,12 @@ xfs_dir2_sf_verify(
 		 * Check the fixed-offset parts of the structure are
 		 * within the data buffer.
 		 */
-		XFS_WANT_CORRUPTED_RETURN(mp,
-				((char *)sfep + sizeof(*sfep)) < endp);
+		if (((char *)sfep + sizeof(*sfep)) >= endp)
+			return -EFSCORRUPTED;
 
 		/* Don't allow names with known bad length. */
-		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen > 0);
-		XFS_WANT_CORRUPTED_RETURN(mp, sfep->namelen < MAXNAMELEN);
+		if (sfep->namelen == 0)
+			return -EFSCORRUPTED;
 
 		/*
 		 * Check that the variable-length part of the structure is
@@ -685,33 +698,39 @@ xfs_dir2_sf_verify(
 		 * name component, so nextentry is an acceptable test.
 		 */
 		next_sfep = dops->sf_nextentry(sfp, sfep);
-		XFS_WANT_CORRUPTED_RETURN(mp, endp >= (char *)next_sfep);
+		if (endp < (char *)next_sfep)
+			return -EFSCORRUPTED;
 
 		/* Check that the offsets always increase. */
-		XFS_WANT_CORRUPTED_RETURN(mp,
-				xfs_dir2_sf_get_offset(sfep) >= offset);
+		if (xfs_dir2_sf_get_offset(sfep) < offset)
+			return -EFSCORRUPTED;
 
 		/* Check the inode number. */
 		ino = dops->sf_get_ino(sfp, sfep);
 		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
-		XFS_WANT_CORRUPTED_RETURN(mp, !xfs_dir_ino_validate(mp, ino));
+		error = xfs_dir_ino_validate(mp, ino);
+		if (error)
+			return error;
 
 		/* Check the file type. */
 		filetype = dops->sf_get_ftype(sfep);
-		XFS_WANT_CORRUPTED_RETURN(mp, filetype < XFS_DIR3_FT_MAX);
+		if (filetype >= XFS_DIR3_FT_MAX)
+			return -EFSCORRUPTED;
 
 		offset = xfs_dir2_sf_get_offset(sfep) +
 				dops->data_entsize(sfep->namelen);
 
 		sfep = next_sfep;
 	}
-	XFS_WANT_CORRUPTED_RETURN(mp, i8count == sfp->i8count);
-	XFS_WANT_CORRUPTED_RETURN(mp, (void *)sfep == (void *)endp);
+	if (i8count != sfp->i8count)
+		return -EFSCORRUPTED;
+	if ((void *)sfep != (void *)endp)
+		return -EFSCORRUPTED;
 
 	/* Make sure this whole thing ought to be in local format. */
-	XFS_WANT_CORRUPTED_RETURN(mp, offset +
-	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
-	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dir_geo->blksize);
+	if (offset + (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+	    (uint)sizeof(xfs_dir2_block_tail_t) > mp->m_dir_geo->blksize)
+		return -EFSCORRUPTED;
 
 	return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 9653e964eda4..8a37efe04de3 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -212,6 +212,16 @@ xfs_iformat_fork(
 	if (error)
 		return error;
 
+	/* Check inline dir contents. */
+	if (S_ISDIR(VFS_I(ip)->i_mode) &&
+	    dip->di_format == XFS_DINODE_FMT_LOCAL) {
+		error = xfs_dir2_sf_verify(ip);
+		if (error) {
+			xfs_idestroy_fork(ip, XFS_DATA_FORK);
+			return error;
+		}
+	}
+
 	if (xfs_is_reflink_inode(ip)) {
 		ASSERT(ip->i_cowfp == NULL);
 		xfs_ifork_init_cow(ip);
@@ -322,8 +332,6 @@ xfs_iformat_local(
 	int		whichfork,
 	int		size)
 {
-	int		error;
-
 	/*
 	 * If the size is unreasonable, then something
 	 * is wrong and we just bail out rather than crash in
@@ -339,14 +347,6 @@ xfs_iformat_local(
 		return -EFSCORRUPTED;
 	}
 
-	if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
-		error = xfs_dir2_sf_verify(ip->i_mount,
-				(struct xfs_dir2_sf_hdr *)XFS_DFORK_DPTR(dip),
-				size);
-		if (error)
-			return error;
-	}
-
 	xfs_init_local_fork(ip, whichfork, XFS_DFORK_PTR(dip, whichfork), size);
 	return 0;
 }
@@ -867,7 +867,7 @@ xfs_iextents_copy(
  * In these cases, the format always takes precedence, because the
  * format indicates the current state of the fork.
  */
-int
+void
 xfs_iflush_fork(
 	xfs_inode_t		*ip,
 	xfs_dinode_t		*dip,
@@ -877,7 +877,6 @@ xfs_iflush_fork(
 	char			*cp;
 	xfs_ifork_t		*ifp;
 	xfs_mount_t		*mp;
-	int			error;
 	static const short	brootflag[2] =
 		{ XFS_ILOG_DBROOT, XFS_ILOG_ABROOT };
 	static const short	dataflag[2] =
@@ -886,7 +885,7 @@ xfs_iflush_fork(
 		{ XFS_ILOG_DEXT, XFS_ILOG_AEXT };
 
 	if (!iip)
-		return 0;
+		return;
 	ifp = XFS_IFORK_PTR(ip, whichfork);
 	/*
 	 * This can happen if we gave up in iformat in an error path,
@@ -894,19 +893,12 @@ xfs_iflush_fork(
 	 */
 	if (!ifp) {
 		ASSERT(whichfork == XFS_ATTR_FORK);
-		return 0;
+		return;
 	}
 	cp = XFS_DFORK_PTR(dip, whichfork);
 	mp = ip->i_mount;
 	switch (XFS_IFORK_FORMAT(ip, whichfork)) {
 	case XFS_DINODE_FMT_LOCAL:
-		if (S_ISDIR(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
-			error = xfs_dir2_sf_verify(mp,
-					(struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data,
-					ifp->if_bytes);
-			if (error)
-				return error;
-		}
 		if ((iip->ili_fields & dataflag[whichfork]) &&
 		    (ifp->if_bytes > 0)) {
 			ASSERT(ifp->if_u1.if_data != NULL);
@@ -959,7 +951,6 @@ xfs_iflush_fork(
 		ASSERT(0);
 		break;
 	}
-	return 0;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 132dc59fdde6..7fb8365326d1 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -140,7 +140,7 @@ typedef struct xfs_ifork {
 struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
 
 int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
-int		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
+void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
 				struct xfs_inode_log_item *, int);
 void		xfs_idestroy_fork(struct xfs_inode *, int);
 void		xfs_idata_realloc(struct xfs_inode *, int, int);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c7fe2c2123ab..7605d8396596 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -50,6 +50,7 @@
 #include "xfs_log.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_reflink.h"
+#include "xfs_dir2_priv.h"
 
 kmem_zone_t *xfs_inode_zone;
 
@@ -3475,7 +3476,6 @@ xfs_iflush_int(
 	struct xfs_inode_log_item *iip = ip->i_itemp;
 	struct xfs_dinode	*dip;
 	struct xfs_mount	*mp = ip->i_mount;
-	int			error;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 	ASSERT(xfs_isiflocked(ip));
@@ -3547,6 +3547,12 @@ xfs_iflush_int(
 	if (ip->i_d.di_version < 3)
 		ip->i_d.di_flushiter++;
 
+	/* Check the inline directory data. */
+	if (S_ISDIR(VFS_I(ip)->i_mode) &&
+	    ip->i_d.di_format == XFS_DINODE_FMT_LOCAL &&
+	    xfs_dir2_sf_verify(ip))
+		goto corrupt_out;
+
 	/*
 	 * Copy the dirty parts of the inode into the on-disk inode.  We always
 	 * copy out the core of the inode, because if the inode is dirty at all
@@ -3558,14 +3564,9 @@ xfs_iflush_int(
 	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
 		ip->i_d.di_flushiter = 0;
 
-	error = xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
-	if (error)
-		return error;
-	if (XFS_IFORK_Q(ip)) {
-		error = xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
-		if (error)
-			return error;
-	}
+	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
+	if (XFS_IFORK_Q(ip))
+		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
 	xfs_inobp_check(mp, bp);
 
 	/*

From 3dd09d5a8589c640abb49cfcf92b4ed669eafad1 Mon Sep 17 00:00:00 2001
From: Calvin Owens <calvinowens@fb.com>
Date: Mon, 3 Apr 2017 12:22:29 -0700
Subject: [PATCH 190/410] xfs: Honor FALLOC_FL_KEEP_SIZE when punching ends of
 files

When punching past EOF on XFS, fallocate(mode=PUNCH_HOLE|KEEP_SIZE) will
round the file size up to the nearest multiple of PAGE_SIZE:

  calvinow@vm-disks/generic-xfs-1 ~$ dd if=/dev/urandom of=test bs=2048 count=1
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
    Size: 2048            Blocks: 8          IO Block: 4096   regular file
  calvinow@vm-disks/generic-xfs-1 ~$ fallocate -n -l 2048 -o 2048 -p test
  calvinow@vm-disks/generic-xfs-1 ~$ stat test
    Size: 4096            Blocks: 8          IO Block: 4096   regular file

Commit 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes") replaced
xfs_zero_remaining_bytes() with calls to iomap helpers. The new helpers
don't enforce that [pos,offset) lies strictly on [0,i_size) when being
called from xfs_free_file_space(), so by "leaking" these ranges into
xfs_zero_range() we get this buggy behavior.

Fix this by reintroducing the checks xfs_zero_remaining_bytes() did
against i_size at the bottom of xfs_free_file_space().

Reported-by: Aaron Gao <gzh@fb.com>
Fixes: 3c2bdc912a1cc050 ("xfs: kill xfs_zero_remaining_bytes")
Cc: Christoph Hellwig <hch@lst.de>
Cc: Brian Foster <bfoster@redhat.com>
Cc: <stable@vger.kernel.org> # 4.8+
Signed-off-by: Calvin Owens <calvinowens@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 8b75dcea5966..828532ce0adc 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1311,8 +1311,16 @@ xfs_free_file_space(
 	/*
 	 * Now that we've unmap all full blocks we'll have to zero out any
 	 * partial block at the beginning and/or end.  xfs_zero_range is
-	 * smart enough to skip any holes, including those we just created.
+	 * smart enough to skip any holes, including those we just created,
+	 * but we must take care not to zero beyond EOF and enlarge i_size.
 	 */
+
+	if (offset >= XFS_ISIZE(ip))
+		return 0;
+
+	if (offset + len > XFS_ISIZE(ip))
+		len = XFS_ISIZE(ip) - offset;
+
 	return xfs_zero_range(ip, offset, len, NULL);
 }
 

From bf9216f922612d2db7666aae01e65064da2ffb3a Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 3 Apr 2017 12:22:39 -0700
Subject: [PATCH 191/410] xfs: fix kernel memory exposure problems

Fix a memory exposure problems in inumbers where we allocate an array of
structures with holes, fail to zero the holes, then blindly copy the
kernel memory contents (junk and all) into userspace.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_itable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 2a6d9b1558e0..26d67ce3c18d 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -583,7 +583,7 @@ xfs_inumbers(
 		return error;
 
 	bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
-	buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
+	buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
 	do {
 		struct xfs_inobt_rec_incore	r;
 		int				stat;

From 280489daa68bd20364b322c11e3f429a0212c611 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 13 Mar 2017 17:43:48 +0100
Subject: [PATCH 192/410] drm/msm: adreno: fix build error without debugfs

The newly added a5xx support fails to build when debugfs is diabled:

drivers/gpu/drm/msm/adreno/a5xx_gpu.c:849:4: error: 'struct msm_gpu_funcs' has no member named 'show'
drivers/gpu/drm/msm/adreno/a5xx_gpu.c:849:11: error: 'a5xx_show' undeclared here (not in a function); did you mean 'a5xx_irq'?

This adds a missing #ifdef.

Fixes: b5f103ab98c7 ("drm/msm: gpu: Add A5XX target support")
Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 4414cf73735d..f0c8bd74ca91 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -860,7 +860,9 @@ static const struct adreno_gpu_funcs funcs = {
 		.idle = a5xx_idle,
 		.irq = a5xx_irq,
 		.destroy = a5xx_destroy,
+#ifdef CONFIG_DEBUG_FS
 		.show = a5xx_show,
+#endif
 	},
 	.get_timestamp = a5xx_get_timestamp,
 };

From f456d348b6320a2597a3f1fff3ad0011c5678603 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 7 Mar 2017 09:50:27 -0700
Subject: [PATCH 193/410] drm/msm: Fix wrong pointer check in a5xx_destroy

Instead of checking for a5xx_gpu->gpmu_iova during destroy we
accidently check a5xx_gpu->gpmu_bo.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index f0c8bd74ca91..36602ac7e248 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -534,7 +534,7 @@ static void a5xx_destroy(struct msm_gpu *gpu)
 	}
 
 	if (a5xx_gpu->gpmu_bo) {
-		if (a5xx_gpu->gpmu_bo)
+		if (a5xx_gpu->gpmu_iova)
 			msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id);
 		drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
 	}

From 1a5dff5d74e55608a9632a1d030bb79196e0755c Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 7 Mar 2017 10:02:51 -0700
Subject: [PATCH 194/410] drm/msm: Don't allow zero sized buffer objects

Zero sized buffer objects tend to make various bits of the GEM
infrastructure complain:

 WARNING: CPU: 1 PID: 2323 at drivers/gpu/drm/drm_mm.c:389 drm_mm_insert_node_generic+0x258/0x2f0
 Modules linked in:

 CPU: 1 PID: 2323 Comm: drm-api-test Tainted: G        W 4.9.0-rc4-00906-g693af44 #213
 Hardware name: Qualcomm Technologies, Inc. DB820c (DT)
 task: ffff8000d7353400 task.stack: ffff8000d7720000
 PC is at drm_mm_insert_node_generic+0x258/0x2f0
 LR is at drm_vma_offset_add+0x4c/0x70

Zero sized buffers serve no appreciable value to the user so disallow
them at create time.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_gem.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 59811f29607d..68e509b3b9e4 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -812,6 +812,12 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 
 	size = PAGE_ALIGN(size);
 
+	/* Disallow zero sized objects as they make the underlying
+	 * infrastructure grumpy
+	 */
+	if (size == 0)
+		return ERR_PTR(-EINVAL);
+
 	ret = msm_gem_new_impl(dev, size, flags, NULL, &obj);
 	if (ret)
 		goto fail;

From a5fef535c529b64b622f9d7eafb3ab86850f5f8f Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Thu, 16 Feb 2017 16:29:04 +0530
Subject: [PATCH 195/410] drm/msm/dsi: Fix bug in dsi_mgr_phy_enable

A recent commit introduces a bug in dsi_mgr_phy_enable. In the non
dual DSI mode, we reset the mdsi (master DSI) PHY. This isn't right
since master and slave DSI exist only in dual DSI mode. For the normal
mode of operation, we should simply reset the PHY of the DSI device
(i.e. msm_dsi) corresponding to the current bridge.

Usage of the wrong DSI pointer also resulted in a static checker
warning. That too is resolved with this fix.

Fixes: b62aa70a98c5 (drm/msm/dsi: Move PHY operations out of host)

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Archit Taneja <architt@codeaurora.org>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/dsi/dsi_manager.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 921270ea6059..a879ffa534b4 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -171,7 +171,7 @@ dsi_mgr_phy_enable(int id,
 			}
 		}
 	} else {
-		msm_dsi_host_reset_phy(mdsi->host);
+		msm_dsi_host_reset_phy(msm_dsi->host);
 		ret = enable_phy(msm_dsi, src_pll_id, &shared_timings[id]);
 		if (ret)
 			return ret;

From 30512040ed8e1982e5ba16bb3e3a7f000ff65427 Mon Sep 17 00:00:00 2001
From: Archit Taneja <architt@codeaurora.org>
Date: Fri, 17 Mar 2017 09:09:48 +0530
Subject: [PATCH 196/410] drm/msm/mdp5: Update SSPP_MAX value

'SSPP_MAX + 1' is the max number of hwpipes that can be present on a
MDP5 platform. Recently, 2 new cursor hwpipes were added, which
caused overflows in arrays that used SSPP_MAX to represent the number
of elements. Update the SSPP_MAX value to incorporate the extra
hwpipes.

Signed-off-by: Archit Taneja <architt@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
index 611da7a660c9..238901987e00 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
@@ -18,7 +18,8 @@
 #ifndef __MDP5_PIPE_H__
 #define __MDP5_PIPE_H__
 
-#define SSPP_MAX	(SSPP_RGB3 + 1) /* TODO: Add SSPP_MAX in mdp5.xml.h */
+/* TODO: Add SSPP_MAX in mdp5.xml.h */
+#define SSPP_MAX	(SSPP_CURSOR1 + 1)
 
 /* represents a hw pipe, which is dynamically assigned to a plane */
 struct mdp5_hw_pipe {

From d322a693f585832130c3d09f2175b8f2b3ae99e1 Mon Sep 17 00:00:00 2001
From: Vinay Simha BN <simhavcs@gmail.com>
Date: Tue, 14 Mar 2017 10:55:56 +0530
Subject: [PATCH 197/410] drm/msm/hdmi: redefinitions of macros not required

4 macros already defined in hdmi.h,
which is not required to redefine in hdmi_audio.c

Signed-off-by: Vinay Simha BN <simhavcs@gmail.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/hdmi/hdmi_audio.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
index a54d3bb5baad..8177e8511afd 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
@@ -18,13 +18,6 @@
 #include <linux/hdmi.h>
 #include "hdmi.h"
 
-
-/* Supported HDMI Audio channels */
-#define MSM_HDMI_AUDIO_CHANNEL_2		0
-#define MSM_HDMI_AUDIO_CHANNEL_4		1
-#define MSM_HDMI_AUDIO_CHANNEL_6		2
-#define MSM_HDMI_AUDIO_CHANNEL_8		3
-
 /* maps MSM_HDMI_AUDIO_CHANNEL_n consts used by audio driver to # of channels: */
 static int nchannels[] = { 2, 4, 6, 8 };
 

From 028402d4bcfd3e99421504674cc41b0cd32768c8 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Mon, 6 Feb 2017 10:39:29 -0700
Subject: [PATCH 198/410] drm/msm: Make sure to detach the MMU during GPU
 cleanup

We should be detaching the MMU before destroying the address
space. To do this cleanly, the detach has to happen in
adreno_gpu_cleanup() because it needs access to structs
in adreno_gpu.c.  Plus it is better symmetry to have
the attach and detach at the same code level.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 29 ++++++++++++++++---------
 drivers/gpu/drm/msm/msm_gpu.c           |  3 ---
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index c9bd1e6225f4..5ae65426b4e5 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -418,18 +418,27 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	return 0;
 }
 
-void adreno_gpu_cleanup(struct adreno_gpu *gpu)
+void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
 {
-	if (gpu->memptrs_bo) {
-		if (gpu->memptrs)
-			msm_gem_put_vaddr(gpu->memptrs_bo);
+	struct msm_gpu *gpu = &adreno_gpu->base;
 
-		if (gpu->memptrs_iova)
-			msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
+	if (adreno_gpu->memptrs_bo) {
+		if (adreno_gpu->memptrs)
+			msm_gem_put_vaddr(adreno_gpu->memptrs_bo);
 
-		drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
+		if (adreno_gpu->memptrs_iova)
+			msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->id);
+
+		drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo);
+	}
+	release_firmware(adreno_gpu->pm4);
+	release_firmware(adreno_gpu->pfp);
+
+	msm_gpu_cleanup(gpu);
+
+	if (gpu->aspace) {
+		gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
+			iommu_ports, ARRAY_SIZE(iommu_ports));
+		msm_gem_address_space_destroy(gpu->aspace);
 	}
-	release_firmware(gpu->pm4);
-	release_firmware(gpu->pfp);
-	msm_gpu_cleanup(&gpu->base);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 99e05aacbee1..af5b6ba4095b 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -706,9 +706,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
 		msm_ringbuffer_destroy(gpu->rb);
 	}
 
-	if (gpu->aspace)
-		msm_gem_address_space_destroy(gpu->aspace);
-
 	if (gpu->fctx)
 		msm_fence_context_free(gpu->fctx);
 }

From feb199ebef488a9f2c3550fb10524f3dac9d8abe Mon Sep 17 00:00:00 2001
From: Tomasz Nowicki <tn@semihalf.com>
Date: Fri, 31 Mar 2017 17:06:44 +0200
Subject: [PATCH 199/410] PCI: thunder-pem: Fix legacy firmware PEM-specific
 resources

SZ_16M PEM resource size includes PEM-specific register and its children
resources. Reservation of the whole SZ_16M range leads to child device
driver failure when pcieport driver is requesting resources:

  pcieport 0004:1f:00.0: can't enable device: BAR 0 [mem 0x87e0c0f00000-0x87e0c0ffffff 64bit] not claimed

So we cannot reserve full 16M here and instead we want to reserve
PEM-specific register only which is SZ_64K.

At the end increase PEM resource to SZ_16M since this is what
thunder_pem_init() call expects for proper initialization.

Fixes: 9abb27c7594a ("PCI: thunder-pem: Add legacy firmware support for Cavium ThunderX host controller")
Signed-off-by: Tomasz Nowicki <tn@semihalf.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: stable@vger.kernel.org	# v4.10+
---
 drivers/pci/host/pci-thunder-pem.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
index b89c373555c5..6e031b522529 100644
--- a/drivers/pci/host/pci-thunder-pem.c
+++ b/drivers/pci/host/pci-thunder-pem.c
@@ -375,7 +375,6 @@ static void thunder_pem_legacy_fw(struct acpi_pci_root *root,
 	index -= node * PEM_MAX_DOM_IN_NODE;
 	res_pem->start = PEM_RES_BASE | FIELD_PREP(PEM_NODE_MASK, node) |
 					FIELD_PREP(PEM_INDX_MASK, index);
-	res_pem->end = res_pem->start + SZ_16M - 1;
 	res_pem->flags = IORESOURCE_MEM;
 }
 
@@ -399,8 +398,15 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg)
 	 */
 	if (ret) {
 		thunder_pem_legacy_fw(root, res_pem);
-		/* Reserve PEM-specific resources and PCI configuration space */
+		/*
+		 * Reserve 64K size PEM specific resources. The full 16M range
+		 * size is required for thunder_pem_init() call.
+		 */
+		res_pem->end = res_pem->start + SZ_64K - 1;
 		thunder_pem_reserve_range(dev, root->segment, res_pem);
+		res_pem->end = res_pem->start + SZ_16M - 1;
+
+		/* Reserve PCI configuration space as well. */
 		thunder_pem_reserve_range(dev, root->segment, &cfg->res);
 	}
 

From 6665f8a307696a0edd4c1233b4cc0f5ed6083525 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 3 Apr 2017 16:17:11 -0500
Subject: [PATCH 200/410] PCI: dwc: Select PCI_HOST_COMMON for hisi

Without PCI_HOST_COMMON support enabled, we get a link error:

  drivers/pci/dwc/built-in.o: In function `hisi_pcie_map_bus':
  pcie-hisi.c:(.text+0x8860): undefined reference to `pci_ecam_map_bus'
  drivers/pci/dwc/built-in.o: In function `hisi_pcie_almost_ecam_probe':
  pcie-hisi.c:(.text+0x88b4): undefined reference to `pci_host_common_probe'

Add an explicit 'select', as the other users have.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Jingoo Han <jingoohan1@gmail.com>
---
 drivers/pci/dwc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/dwc/Kconfig b/drivers/pci/dwc/Kconfig
index dfb8a69afc28..d2d2ba5b8a68 100644
--- a/drivers/pci/dwc/Kconfig
+++ b/drivers/pci/dwc/Kconfig
@@ -89,6 +89,7 @@ config PCI_HISI
 	depends on PCI_MSI_IRQ_DOMAIN
 	select PCIEPORTBUS
 	select PCIE_DW_HOST
+	select PCI_HOST_COMMON
 	help
 	  Say Y here if you want PCIe controller support on HiSilicon
 	  Hip05 and Hip06 SoCs

From ac6a3722fed67c658a435187d0254ae119d845d3 Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Mon, 3 Apr 2017 15:42:58 -0400
Subject: [PATCH 201/410] flow dissector: correct size of storage for ARP

The last argument to __skb_header_pointer() should be a buffer large
enough to store struct arphdr. This can be a pointer to a struct arphdr
structure. The code was previously using a pointer to a pointer to
struct arphdr.

By my counting the storage available both before and after is 8 bytes on
x86_64.

Fixes: 55733350e5e8 ("flow disector: ARP support")
Reported-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c35aae13c8d2..d98d4998213d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -390,7 +390,7 @@ mpls:
 			unsigned char ar_tip[4];
 		} *arp_eth, _arp_eth;
 		const struct arphdr *arp;
-		struct arphdr *_arp;
+		struct arphdr _arp;
 
 		arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
 					   hlen, &_arp);

From df2729c3238ed89fb8ccf850d38c732858a5bade Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 1 Apr 2017 17:15:59 +0800
Subject: [PATCH 202/410] sctp: check for dst and pathmtu update in
 sctp_packet_config

This patch is to move sctp_transport_dst_check into sctp_packet_config
from sctp_packet_transmit and add pathmtu check in sctp_packet_config.

With this fix, sctp can update dst or pathmtu before appending chunks,
which can void dropping packets in sctp_packet_transmit when dst is
obsolete or dst's mtu is changed.

This patch is also to improve some other codes in sctp_packet_config.
It updates packet max_size with gso_max_size, checks for dst and
pathmtu, and appends ecne chunk only when packet is empty and asoc
is not NULL.

It makes sctp flush work better, as we only need to set up them once
for one flush schedule. It's also safe, since asoc is NULL only when
the packet is created by sctp_ootb_pkt_new in which it just gets the
new dst, no need to do more things for it other than set packet with
transport's pathmtu.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 17 +++++++++--
 net/sctp/output.c       | 67 ++++++++++++++++++++++-------------------
 2 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 1f71ee5ab518..d75caa7a629b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -596,12 +596,23 @@ static inline void sctp_v4_map_v6(union sctp_addr *addr)
  */
 static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t)
 {
-	if (t->dst && (!dst_check(t->dst, t->dst_cookie) ||
-		       t->pathmtu != max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)),
-					   SCTP_DEFAULT_MINSEGMENT)))
+	if (t->dst && !dst_check(t->dst, t->dst_cookie))
 		sctp_transport_dst_release(t);
 
 	return t->dst;
 }
 
+static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
+{
+	__u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)),
+			   SCTP_DEFAULT_MINSEGMENT);
+
+	if (t->pathmtu == pmtu)
+		return true;
+
+	t->pathmtu = pmtu;
+
+	return false;
+}
+
 #endif /* __net_sctp_h__ */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 73fd178007a3..ec4d50a38713 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -86,43 +86,53 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 {
 	struct sctp_transport *tp = packet->transport;
 	struct sctp_association *asoc = tp->asoc;
+	struct sock *sk;
 
 	pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
-
 	packet->vtag = vtag;
 
-	if (asoc && tp->dst) {
-		struct sock *sk = asoc->base.sk;
+	/* do the following jobs only once for a flush schedule */
+	if (!sctp_packet_empty(packet))
+		return;
 
-		rcu_read_lock();
-		if (__sk_dst_get(sk) != tp->dst) {
-			dst_hold(tp->dst);
-			sk_setup_caps(sk, tp->dst);
-		}
+	/* set packet max_size with pathmtu */
+	packet->max_size = tp->pathmtu;
+	if (!asoc)
+		return;
 
-		if (sk_can_gso(sk)) {
-			struct net_device *dev = tp->dst->dev;
-
-			packet->max_size = dev->gso_max_size;
-		} else {
-			packet->max_size = asoc->pathmtu;
-		}
-		rcu_read_unlock();
-
-	} else {
-		packet->max_size = tp->pathmtu;
+	/* update dst or transport pathmtu if in need */
+	sk = asoc->base.sk;
+	if (!sctp_transport_dst_check(tp)) {
+		sctp_transport_route(tp, NULL, sctp_sk(sk));
+		if (asoc->param_flags & SPP_PMTUD_ENABLE)
+			sctp_assoc_sync_pmtu(sk, asoc);
+	} else if (!sctp_transport_pmtu_check(tp)) {
+		if (asoc->param_flags & SPP_PMTUD_ENABLE)
+			sctp_assoc_sync_pmtu(sk, asoc);
 	}
 
-	if (ecn_capable && sctp_packet_empty(packet)) {
-		struct sctp_chunk *chunk;
+	/* If there a is a prepend chunk stick it on the list before
+	 * any other chunks get appended.
+	 */
+	if (ecn_capable) {
+		struct sctp_chunk *chunk = sctp_get_ecne_prepend(asoc);
 
-		/* If there a is a prepend chunk stick it on the list before
-		 * any other chunks get appended.
-		 */
-		chunk = sctp_get_ecne_prepend(asoc);
 		if (chunk)
 			sctp_packet_append_chunk(packet, chunk);
 	}
+
+	if (!tp->dst)
+		return;
+
+	/* set packet max_size with gso_max_size if gso is enabled*/
+	rcu_read_lock();
+	if (__sk_dst_get(sk) != tp->dst) {
+		dst_hold(tp->dst);
+		sk_setup_caps(sk, tp->dst);
+	}
+	packet->max_size = sk_can_gso(sk) ? tp->dst->dev->gso_max_size
+					  : asoc->pathmtu;
+	rcu_read_unlock();
 }
 
 /* Initialize the packet structure. */
@@ -582,12 +592,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	sh->vtag = htonl(packet->vtag);
 	sh->checksum = 0;
 
-	/* update dst if in need */
-	if (!sctp_transport_dst_check(tp)) {
-		sctp_transport_route(tp, NULL, sctp_sk(sk));
-		if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE)
-			sctp_assoc_sync_pmtu(sk, asoc);
-	}
+	/* drop packet if no dst */
 	dst = dst_clone(tp->dst);
 	if (!dst) {
 		IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);

From 0b9aefea860063bb39e36bd7fe6c7087fed0ba87 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Sat, 1 Apr 2017 11:00:21 -0300
Subject: [PATCH 203/410] tcp: minimize false-positives on TCP/GRO check

Markus Trippelsdorf reported that after commit dcb17d22e1c2 ("tcp: warn
on bogus MSS and try to amend it") the kernel started logging the
warning for a NIC driver that doesn't even support GRO.

It was diagnosed that it was possibly caused on connections that were
using TCP Timestamps but some packets lacked the Timestamps option. As
we reduce rcv_mss when timestamps are used, the lack of them would cause
the packets to be bigger than expected, although this is a valid case.

As this warning is more as a hint, getting a clean-cut on the
threshold is probably not worth the execution time spent on it. This
patch thus alleviates the false-positives with 2 quick checks: by
accounting for the entire TCP option space and also checking against the
interface MTU if it's available.

These changes, specially the MTU one, might mask some real positives,
though if they are really happening, it's possible that sooner or later
it will be triggered anyway.

Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c43119726a62..97ac6776e47d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -126,7 +126,8 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 #define REXMIT_LOST	1 /* retransmit packets marked lost */
 #define REXMIT_NEW	2 /* FRTO-style transmit of unsent/new packets */
 
-static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
+static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
+			     unsigned int len)
 {
 	static bool __once __read_mostly;
 
@@ -137,8 +138,9 @@ static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb)
 
 		rcu_read_lock();
 		dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
-		pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
-			dev ? dev->name : "Unknown driver");
+		if (!dev || len >= dev->mtu)
+			pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
+				dev ? dev->name : "Unknown driver");
 		rcu_read_unlock();
 	}
 }
@@ -161,8 +163,10 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
 	if (len >= icsk->icsk_ack.rcv_mss) {
 		icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
 					       tcp_sk(sk)->advmss);
-		if (unlikely(icsk->icsk_ack.rcv_mss != len))
-			tcp_gro_dev_warn(sk, skb);
+		/* Account for possibly-removed options */
+		if (unlikely(len > icsk->icsk_ack.rcv_mss +
+				   MAX_TCP_OPTION_SPACE))
+			tcp_gro_dev_warn(sk, skb, len);
 	} else {
 		/* Otherwise, we make more careful check taking into account,
 		 * that SACKs block is variable.

From 7bf8222b9bd0ba867e18b7f4537b61ef2e92eee8 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 3 Apr 2017 15:25:53 -0400
Subject: [PATCH 204/410] irq/affinity: Fix CPU spread for unbalanced nodes

The irq_create_affinity_masks routine is responsible for assigning a
number of interrupt vectors to CPUs. The optimal assignemnet will spread
requested vectors to all CPUs, with the fewest CPUs sharing a vector.

The algorithm may fail to assign some vectors to any CPUs if a node's
CPU count is lower than the average number of vectors per node. These
vectors are unusable and create an un-optimal spread.

Recalculate the number of vectors to assign at each node iteration by using
the remaining number of vectors and nodes to be assigned, not exceeding the
number of CPUs in that node. This will guarantee that every CPU is assigned
at least one vector.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: linux-nvme@lists.infradead.org
Link: http://lkml.kernel.org/r/1491247553-7603-1-git-send-email-keith.busch@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/affinity.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 4544b115f5eb..dc529116f7e6 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -59,7 +59,7 @@ static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
 struct cpumask *
 irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 {
-	int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec;
+	int n, nodes, cpus_per_vec, extra_vecs, curvec;
 	int affv = nvecs - affd->pre_vectors - affd->post_vectors;
 	int last_affv = affv + affd->pre_vectors;
 	nodemask_t nodemsk = NODE_MASK_NONE;
@@ -94,19 +94,21 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 		goto done;
 	}
 
-	/* Spread the vectors per node */
-	vecs_per_node = affv / nodes;
-	/* Account for rounding errors */
-	extra_vecs = affv - (nodes * vecs_per_node);
-
 	for_each_node_mask(n, nodemsk) {
-		int ncpus, v, vecs_to_assign = vecs_per_node;
+		int ncpus, v, vecs_to_assign, vecs_per_node;
+
+		/* Spread the vectors per node */
+		vecs_per_node = (affv - curvec) / nodes;
 
 		/* Get the cpus on this node which are in the mask */
 		cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
 
 		/* Calculate the number of cpus per vector */
 		ncpus = cpumask_weight(nmsk);
+		vecs_to_assign = min(vecs_per_node, ncpus);
+
+		/* Account for rounding errors */
+		extra_vecs = ncpus - vecs_to_assign;
 
 		for (v = 0; curvec < last_affv && v < vecs_to_assign;
 		     curvec++, v++) {
@@ -115,14 +117,14 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 			/* Account for extra vectors to compensate rounding errors */
 			if (extra_vecs) {
 				cpus_per_vec++;
-				if (!--extra_vecs)
-					vecs_per_node++;
+				--extra_vecs;
 			}
 			irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
 		}
 
 		if (curvec >= last_affv)
 			break;
+		--nodes;
 	}
 
 done:

From 09a6adf53d42ca3088fa3fb41f40b768efc711ed Mon Sep 17 00:00:00 2001
From: Victor Kamensky <kamensky@cisco.com>
Date: Mon, 3 Apr 2017 22:51:01 -0700
Subject: [PATCH 205/410] arm64: mm: unaligned access by user-land should be
 received as SIGBUS

After 52d7523 (arm64: mm: allow the kernel to handle alignment faults on
user accesses) commit user-land accesses that produce unaligned exceptions
like in case of aarch32 ldm/stm/ldrd/strd instructions operating on
unaligned memory received by user-land as SIGSEGV. It is wrong, it should
be reported as SIGBUS as it was before 52d7523 commit.

Changed do_bad_area function to take signal and code parameters out of esr
value using fault_info table, so in case of do_alignment_fault fault
user-land will receive SIGBUS. Wrapped access to fault_info table into
esr_to_fault_info function.

Cc: <stable@vger.kernel.org>
Fixes: 52d7523 (arm64: mm: allow the kernel to handle alignment faults on user accesses)
Signed-off-by: Victor Kamensky <kamensky@cisco.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/fault.c | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 4bf899fb451b..1b35b8bddbfb 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -42,7 +42,20 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
-static const char *fault_name(unsigned int esr);
+struct fault_info {
+	int	(*fn)(unsigned long addr, unsigned int esr,
+		      struct pt_regs *regs);
+	int	sig;
+	int	code;
+	const char *name;
+};
+
+static const struct fault_info fault_info[];
+
+static inline const struct fault_info *esr_to_fault_info(unsigned int esr)
+{
+	return fault_info + (esr & 63);
+}
 
 #ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
@@ -197,10 +210,12 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr,
 			    struct pt_regs *regs)
 {
 	struct siginfo si;
+	const struct fault_info *inf;
 
 	if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) {
+		inf = esr_to_fault_info(esr);
 		pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n",
-			tsk->comm, task_pid_nr(tsk), fault_name(esr), sig,
+			tsk->comm, task_pid_nr(tsk), inf->name, sig,
 			addr, esr);
 		show_pte(tsk->mm, addr);
 		show_regs(regs);
@@ -219,14 +234,16 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->active_mm;
+	const struct fault_info *inf;
 
 	/*
 	 * If we are in kernel mode at this point, we have no context to
 	 * handle this fault with.
 	 */
-	if (user_mode(regs))
-		__do_user_fault(tsk, addr, esr, SIGSEGV, SEGV_MAPERR, regs);
-	else
+	if (user_mode(regs)) {
+		inf = esr_to_fault_info(esr);
+		__do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs);
+	} else
 		__do_kernel_fault(mm, addr, esr, regs);
 }
 
@@ -488,12 +505,7 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 	return 1;
 }
 
-static const struct fault_info {
-	int	(*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs);
-	int	sig;
-	int	code;
-	const char *name;
-} fault_info[] = {
+static const struct fault_info fault_info[] = {
 	{ do_bad,		SIGBUS,  0,		"ttbr address size fault"	},
 	{ do_bad,		SIGBUS,  0,		"level 1 address size fault"	},
 	{ do_bad,		SIGBUS,  0,		"level 2 address size fault"	},
@@ -560,19 +572,13 @@ static const struct fault_info {
 	{ do_bad,		SIGBUS,  0,		"unknown 63"			},
 };
 
-static const char *fault_name(unsigned int esr)
-{
-	const struct fault_info *inf = fault_info + (esr & 63);
-	return inf->name;
-}
-
 /*
  * Dispatch a data abort to the relevant handler.
  */
 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
 					 struct pt_regs *regs)
 {
-	const struct fault_info *inf = fault_info + (esr & 63);
+	const struct fault_info *inf = esr_to_fault_info(esr);
 	struct siginfo info;
 
 	if (!inf->fn(addr, esr, regs))

From 8b3405e345b5a098101b0c31b264c812bba045d9 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Mon, 3 Apr 2017 15:12:43 +0100
Subject: [PATCH 206/410] kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd

In kvm_free_stage2_pgd() we don't hold the kvm->mmu_lock while calling
unmap_stage2_range() on the entire memory range for the guest. This could
cause problems with other callers (e.g, munmap on a memslot) trying to
unmap a range. And since we have to unmap the entire Guest memory range
holding a spinlock, make sure we yield the lock if necessary, after we
unmap each PUD range.

Fixes: commit d5d8184d35c9 ("KVM: ARM: Memory virtualization setup")
Cc: stable@vger.kernel.org # v3.10+
Cc: Paolo Bonzini <pbonzin@redhat.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
[ Avoid vCPU starvation and lockup detector warnings ]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 arch/arm/kvm/mmu.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 13b9c1fa8961..582a972371cf 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -292,11 +292,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	phys_addr_t addr = start, end = start + size;
 	phys_addr_t next;
 
+	assert_spin_locked(&kvm->mmu_lock);
 	pgd = kvm->arch.pgd + stage2_pgd_index(addr);
 	do {
 		next = stage2_pgd_addr_end(addr, end);
 		if (!stage2_pgd_none(*pgd))
 			unmap_stage2_puds(kvm, pgd, addr, next);
+		/*
+		 * If the range is too large, release the kvm->mmu_lock
+		 * to prevent starvation and lockup detector warnings.
+		 */
+		if (next != end)
+			cond_resched_lock(&kvm->mmu_lock);
 	} while (pgd++, addr = next, addr != end);
 }
 
@@ -831,7 +838,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	if (kvm->arch.pgd == NULL)
 		return;
 
+	spin_lock(&kvm->mmu_lock);
 	unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
+	spin_unlock(&kvm->mmu_lock);
+
 	/* Free the HW pgd, one page at a time */
 	free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
 	kvm->arch.pgd = NULL;

From 5b0d2cc2805897c14257f6dbb949639c499c3c25 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 18 Mar 2017 13:56:56 +0100
Subject: [PATCH 207/410] KVM: arm64: Ensure LRs are clear when they should be

We currently have some code to clear the list registers on GICv3, but we
never call this code, because the caller got nuked when removing the old
vgic.  We also used to have a similar GICv2 part, but that got lost in
the process too.

Let's reintroduce the logic for GICv2 and call the logic when we
initialize the use of hypervisors on the CPU, for example when first
loading KVM or when exiting a low power state.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/arm.c            |  3 +++
 include/kvm/arm_vgic.h        |  1 +
 virt/kvm/arm/vgic/vgic-init.c | 19 +++++++++++++++++++
 virt/kvm/arm/vgic/vgic-v2.c   | 15 +++++++++++++++
 virt/kvm/arm/vgic/vgic.h      |  2 ++
 5 files changed, 40 insertions(+)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 96dba7cd8be7..314eb6abe1ff 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1124,6 +1124,9 @@ static void cpu_hyp_reinit(void)
 		if (__hyp_get_vectors() == hyp_default_vectors)
 			cpu_init_hyp_mode(NULL);
 	}
+
+	if (vgic_present)
+		kvm_vgic_init_cpu_hardware();
 }
 
 static void cpu_hyp_reset(void)
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index b72dd2ad5f44..c0b3d999c266 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -295,6 +295,7 @@ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
 int kvm_vgic_map_resources(struct kvm *kvm);
 int kvm_vgic_hyp_init(void);
+void kvm_vgic_init_cpu_hardware(void);
 
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 			bool level);
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 276139a24e6f..702f8108608d 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -391,6 +391,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+/**
+ * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
+ *
+ * For a specific CPU, initialize the GIC VE hardware.
+ */
+void kvm_vgic_init_cpu_hardware(void)
+{
+	BUG_ON(preemptible());
+
+	/*
+	 * We want to make sure the list registers start out clear so that we
+	 * only have the program the used registers.
+	 */
+	if (kvm_vgic_global_state.type == VGIC_V2)
+		vgic_v2_init_lrs();
+	else
+		kvm_call_hyp(__vgic_v3_init_lrs);
+}
+
 /**
  * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
  * according to the host GIC model. Accordingly calls either
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index b834ecdf3225..94cf4b9b6471 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -36,6 +36,21 @@ static unsigned long *u64_to_bitmask(u64 *val)
 	return (unsigned long *)val;
 }
 
+static inline void vgic_v2_write_lr(int lr, u32 val)
+{
+	void __iomem *base = kvm_vgic_global_state.vctrl_base;
+
+	writel_relaxed(val, base + GICH_LR0 + (lr * 4));
+}
+
+void vgic_v2_init_lrs(void)
+{
+	int i;
+
+	for (i = 0; i < kvm_vgic_global_state.nr_lr; i++)
+		vgic_v2_write_lr(i, 0);
+}
+
 void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index db28f7cadab2..91566f5aac9b 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -130,6 +130,8 @@ int vgic_v2_map_resources(struct kvm *kvm);
 int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
 			     enum vgic_type);
 
+void vgic_v2_init_lrs(void);
+
 static inline void vgic_get_irq_kref(struct vgic_irq *irq)
 {
 	if (irq->intid < VGIC_MIN_LPI)

From 6d56111c92d247bb64301029fe88365aa4caf16e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <cdall@linaro.org>
Date: Tue, 21 Mar 2017 22:05:22 +0100
Subject: [PATCH 208/410] KVM: arm/arm64: vgic: Fix GICC_PMR uaccess on GICv3
 and clarify ABI

As an oversight, for GICv2, we accidentally export the GICC_PMR register
in the format of the GICH_VMCR.VMPriMask field in the lower 5 bits of a
word, meaning that userspace must always use the lower 5 bits to
communicate with the KVM device and must shift the value left by 3
places to obtain the actual priority mask level.

Since GICv3 supports the full 8 bits of priority masking in the ICH_VMCR,
we have to fix the value we export when emulating a GICv2 on top of a
hardware GICv3 and exporting the emulated GICv2 state to userspace.

Take the chance to clarify this aspect of the ABI.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <cdall@linaro.org>
---
 .../virtual/kvm/devices/arm-vgic.txt          |  6 ++++++
 include/linux/irqchip/arm-gic.h               |  3 +++
 virt/kvm/arm/vgic/vgic-mmio-v2.c              | 20 +++++++++++++++++--
 virt/kvm/arm/vgic/vgic-v2.c                   |  8 ++++----
 virt/kvm/arm/vgic/vgic.h                      |  9 ++++++++-
 5 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
index 76e61c883347..b2f60ca8b60c 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -83,6 +83,12 @@ Groups:
 
     Bits for undefined preemption levels are RAZ/WI.
 
+    For historical reasons and to provide ABI compatibility with userspace we
+    export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
+    field in the lower 5 bits of a word, meaning that userspace must always
+    use the lower 5 bits to communicate with the KVM device and must shift the
+    value left by 3 places to obtain the actual priority mask level.
+
   Limitations:
     - Priorities are not implemented, and registers are RAZ/WI
     - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index eafc965b3eb8..dc30f3d057eb 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -96,6 +96,9 @@
 #define GICH_MISR_EOI			(1 << 0)
 #define GICH_MISR_U			(1 << 1)
 
+#define GICV_PMR_PRIORITY_SHIFT		3
+#define GICV_PMR_PRIORITY_MASK		(0x1f << GICV_PMR_PRIORITY_SHIFT)
+
 #ifndef __ASSEMBLY__
 
 #include <linux/irqdomain.h>
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index a3ad7ff95c9b..0a4283ed9aa7 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
 		val = vmcr.ctlr;
 		break;
 	case GIC_CPU_PRIMASK:
-		val = vmcr.pmr;
+		/*
+		 * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
+		 * the PMR field as GICH_VMCR.VMPriMask rather than
+		 * GICC_PMR.Priority, so we expose the upper five bits of
+		 * priority mask to userspace using the lower bits in the
+		 * unsigned long.
+		 */
+		val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >>
+			GICV_PMR_PRIORITY_SHIFT;
 		break;
 	case GIC_CPU_BINPOINT:
 		val = vmcr.bpr;
@@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
 		vmcr.ctlr = val;
 		break;
 	case GIC_CPU_PRIMASK:
-		vmcr.pmr = val;
+		/*
+		 * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
+		 * the PMR field as GICH_VMCR.VMPriMask rather than
+		 * GICC_PMR.Priority, so we expose the upper five bits of
+		 * priority mask to userspace using the lower bits in the
+		 * unsigned long.
+		 */
+		vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) &
+			GICV_PMR_PRIORITY_MASK;
 		break;
 	case GIC_CPU_BINPOINT:
 		vmcr.bpr = val;
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 94cf4b9b6471..b637d9c7afe3 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -206,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 		GICH_VMCR_ALIAS_BINPOINT_MASK;
 	vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
 		GICH_VMCR_BINPOINT_MASK;
-	vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
-		GICH_VMCR_PRIMASK_MASK;
+	vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) <<
+		 GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
 
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
 }
@@ -222,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
 			GICH_VMCR_ALIAS_BINPOINT_SHIFT;
 	vmcrp->bpr  = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
 			GICH_VMCR_BINPOINT_SHIFT;
-	vmcrp->pmr  = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
-			GICH_VMCR_PRIMASK_SHIFT;
+	vmcrp->pmr  = ((vmcr & GICH_VMCR_PRIMASK_MASK) >>
+			GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
 }
 
 void vgic_v2_enable(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 91566f5aac9b..6cf557e9f718 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
 		return irq->pending_latch || irq->line_level;
 }
 
+/*
+ * This struct provides an intermediate representation of the fields contained
+ * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
+ * state to userspace can generate either GICv2 or GICv3 CPU interface
+ * registers regardless of the hardware backed GIC used.
+ */
 struct vgic_vmcr {
 	u32	ctlr;
 	u32	abpr;
 	u32	bpr;
-	u32	pmr;
+	u32	pmr;  /* Priority mask field in the GICC_PMR and
+		       * ICC_PMR_EL1 priority field format */
 	/* Below member variable are valid only for GICv3 */
 	u32	grpen0;
 	u32	grpen1;

From 48fe9e9488743eec9b7c1addd3c93f12f2123d54 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Tue, 4 Apr 2017 14:56:05 +1000
Subject: [PATCH 209/410] powerpc: Don't try to fix up misaligned
 load-with-reservation instructions

In the past, there was only one load-with-reservation instruction,
lwarx, and if a program attempted a lwarx on a misaligned address, it
would take an alignment interrupt and the kernel handler would emulate
it as though it was lwzx, which was not really correct, but benign since
it is loading the right amount of data, and the lwarx should be paired
with a stwcx. to the same address, which would also cause an alignment
interrupt which would result in a SIGBUS being delivered to the process.

We now have 5 different sizes of load-with-reservation instruction. Of
those, lharx and ldarx cause an immediate SIGBUS by luck since their
entries in aligninfo[] overlap instructions which were not fixed up, but
lqarx overlaps with lhz and will be emulated as such. lbarx can never
generate an alignment interrupt since it only operates on 1 byte.

To straighten this out and fix the lqarx case, this adds code to detect
the l[hwdq]arx instructions and return without fixing them up, resulting
in a SIGBUS being delivered to the process.

Cc: stable@vger.kernel.org
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/align.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index cbc7c42cdb74..ec7a8b099dd9 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -807,14 +807,25 @@ int fix_alignment(struct pt_regs *regs)
 	nb = aligninfo[instr].len;
 	flags = aligninfo[instr].flags;
 
-	/* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */
-	if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) {
-		nb = 8;
-		flags = LD+SW;
-	} else if (IS_XFORM(instruction) &&
-		   ((instruction >> 1) & 0x3ff) == 660) {
-		nb = 8;
-		flags = ST+SW;
+	/*
+	 * Handle some cases which give overlaps in the DSISR values.
+	 */
+	if (IS_XFORM(instruction)) {
+		switch (get_xop(instruction)) {
+		case 532:	/* ldbrx */
+			nb = 8;
+			flags = LD+SW;
+			break;
+		case 660:	/* stdbrx */
+			nb = 8;
+			flags = ST+SW;
+			break;
+		case 20:	/* lwarx */
+		case 84:	/* ldarx */
+		case 116:	/* lharx */
+		case 276:	/* lqarx */
+			return 0;	/* not emulated ever */
+		}
 	}
 
 	/* Byteswap little endian loads and stores */

From 794a8604fe6e4a311373cde57a86ad4aab9d32b8 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@axis.com>
Date: Mon, 3 Apr 2017 17:35:12 -0500
Subject: [PATCH 210/410] PCI: dwc: Fix dw_pcie_ops NULL pointer dereference

Fix a crash from dereferencing a NULL dw_pcie_ops pointer.  For example,
on ARTPEC-6:

  Unable to handle kernel NULL pointer dereference at virtual address 00000004
  pgd = c0204000
  [00000004] *pgd=00000000
  Internal error: Oops: 5 [#1] SMP ARM
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc3-next-20170321 #1
  Hardware name: Axis ARTPEC-6 Platform
  task: db098000 task.stack: db096000
  PC is at dw_pcie_writel_dbi+0x2c/0xd0

Prior to 442ec4c04d12 ("PCI: dwc: all: Split struct pcie_port into
host-only and core structures"), every driver had a struct pcie_host_ops
with function pointers, typically used as:

  if (pp->ops->readl_rc)
    return pp->ops->readl_rc(...);

442ec4c04d12 split struct pcie_host_ops into two pieces: struct
dw_pcie_host_ops and struct dw_pcie_ops, so the above became:

  if (pci->ops->readl_dbi)
    return pci->ops->readl_dbi(...);

But pcie-artpec6.c and pcie-designware-plat.c don't need the dw_pcie_ops
pointers and didn't supply a pci->ops struct, which leads to NULL pointer
dereferences.

Supply an empty struct dw_pcie_ops to avoid the NULL pointer dereferences.

[bhelgaas: changelog]
Fixes: 442ec4c04d12 ("PCI: dwc: all: Split struct pcie_port into host-only and core structures")
Signed-off-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
Acked-by: Joao Pinto <jpinto@synopsys.com>
---
 drivers/pci/dwc/pcie-artpec6.c         | 4 ++++
 drivers/pci/dwc/pcie-designware-plat.c | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/pci/dwc/pcie-artpec6.c b/drivers/pci/dwc/pcie-artpec6.c
index fcd3ef845883..6d23683c0892 100644
--- a/drivers/pci/dwc/pcie-artpec6.c
+++ b/drivers/pci/dwc/pcie-artpec6.c
@@ -234,6 +234,9 @@ static int artpec6_add_pcie_port(struct artpec6_pcie *artpec6_pcie,
 	return 0;
 }
 
+static const struct dw_pcie_ops dw_pcie_ops = {
+};
+
 static int artpec6_pcie_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -252,6 +255,7 @@ static int artpec6_pcie_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
 
 	artpec6_pcie->pci = pci;
 
diff --git a/drivers/pci/dwc/pcie-designware-plat.c b/drivers/pci/dwc/pcie-designware-plat.c
index b6c832ba39dd..f20d494922ab 100644
--- a/drivers/pci/dwc/pcie-designware-plat.c
+++ b/drivers/pci/dwc/pcie-designware-plat.c
@@ -86,6 +86,9 @@ static int dw_plat_add_pcie_port(struct pcie_port *pp,
 	return 0;
 }
 
+static const struct dw_pcie_ops dw_pcie_ops = {
+};
+
 static int dw_plat_pcie_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -103,6 +106,7 @@ static int dw_plat_pcie_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	pci->dev = dev;
+	pci->ops = &dw_pcie_ops;
 
 	dw_plat_pcie->pci = pci;
 

From ab007cc94ff9d82f5a8db8363b3becbd946e58cf Mon Sep 17 00:00:00 2001
From: Ladi Prosek <lprosek@redhat.com>
Date: Fri, 31 Mar 2017 10:19:26 +0200
Subject: [PATCH 211/410] KVM: nVMX: do not leak PML full vmexit to L1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PML feature is not exposed to guests so we should not be forwarding
the vmexit either.

This commit fixes BSOD 0x20001 (HYPERVISOR_ERROR) when running Hyper-V
enabled Windows Server 2016 in L1 on hardware that supports PML.

Fixes: 843e4330573c ("KVM: VMX: Add PML support in VMX")
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2ee00dbbbd51..605183291069 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8198,6 +8198,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
 	case EXIT_REASON_PREEMPTION_TIMER:
 		return false;
+	case EXIT_REASON_PML_FULL:
+		/* We don't expose PML support to L1. */
+		return false;
 	default:
 		return true;
 	}

From ac4cde398a96c1d28b1c28a0f69b6efd892a1c8a Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Tue, 4 Apr 2017 06:27:22 -0600
Subject: [PATCH 212/410] xenbus: remove transaction holder from list before
 freeing

After allocation the item is being placed on the list right away.
Consequently it needs to be taken off the list before freeing in the
case xenbus_dev_request_and_reply() failed, as in that case the
callback (xenbus_dev_queue_reply()) is not being called (and if it
was called, it should do both).

Fixes: 5584ea250ae44f929feb4c7bd3877d1c5edbf813
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 drivers/xen/xenbus/xenbus_dev_frontend.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 1f4733b80c87..f3b089b7c0b6 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -442,8 +442,10 @@ static int xenbus_write_transaction(unsigned msg_type,
 		return xenbus_command_reply(u, XS_ERROR, "ENOENT");
 
 	rc = xenbus_dev_request_and_reply(&u->u.msg, u);
-	if (rc)
+	if (rc && trans) {
+		list_del(&trans->list);
 		kfree(trans);
+	}
 
 out:
 	return rc;

From 1fb883bb827ee8efc1cc9ea0154f953f8a219d38 Mon Sep 17 00:00:00 2001
From: Ladi Prosek <lprosek@redhat.com>
Date: Tue, 4 Apr 2017 14:18:53 +0200
Subject: [PATCH 213/410] KVM: nVMX: initialize PML fields in vmcs02
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

L2 was running with uninitialized PML fields which led to incomplete
dirty bitmap logging. This manifested as all kinds of subtle erratic
behavior of the nested guest.

Fixes: 843e4330573c ("KVM: VMX: Add PML support in VMX")
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 605183291069..259e9b28ccf8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10270,6 +10270,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
 	}
 
+	if (enable_pml) {
+		/*
+		 * Conceptually we want to copy the PML address and index from
+		 * vmcs01 here, and then back to vmcs01 on nested vmexit. But,
+		 * since we always flush the log on each vmexit, this happens
+		 * to be equivalent to simply resetting the fields in vmcs02.
+		 */
+		ASSERT(vmx->pml_pg);
+		vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+		vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+	}
+
 	if (nested_cpu_has_ept(vmcs12)) {
 		kvm_mmu_unload(vcpu);
 		nested_ept_init_mmu_context(vcpu);

From 62e24c5775ecb387a3eb33701378ccfa6dbc98ee Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 5 Feb 2016 13:41:39 +0100
Subject: [PATCH 214/410] reset: add exported __reset_control_get, return NULL
 if optional

Rename the internal __reset_control_get/put functions to
__reset_control_get/put_internal and add an exported
__reset_control_get equivalent to __of_reset_control_get
that takes a struct device parameter.
This avoids the confusing call to __of_reset_control_get in
the non-DT case and fixes the devm_reset_control_get_optional
function to return NULL if RESET_CONTROLLER is enabled but
dev->of_node == NULL.

Fixes: bb475230b8e5 ("reset: make optional functions really optional")
Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Ramiro Oliveira <Ramiro.Oliveira@synopsys.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/reset/core.c  | 22 ++++++++++++++++------
 include/linux/reset.h | 22 ++++++++++++++--------
 2 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index f1e5e65388bb..cd739d2fa160 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -275,7 +275,7 @@ int reset_control_status(struct reset_control *rstc)
 }
 EXPORT_SYMBOL_GPL(reset_control_status);
 
-static struct reset_control *__reset_control_get(
+static struct reset_control *__reset_control_get_internal(
 				struct reset_controller_dev *rcdev,
 				unsigned int index, bool shared)
 {
@@ -308,7 +308,7 @@ static struct reset_control *__reset_control_get(
 	return rstc;
 }
 
-static void __reset_control_put(struct reset_control *rstc)
+static void __reset_control_put_internal(struct reset_control *rstc)
 {
 	lockdep_assert_held(&reset_list_mutex);
 
@@ -377,7 +377,7 @@ struct reset_control *__of_reset_control_get(struct device_node *node,
 	}
 
 	/* reset_list_mutex also protects the rcdev's reset_control list */
-	rstc = __reset_control_get(rcdev, rstc_id, shared);
+	rstc = __reset_control_get_internal(rcdev, rstc_id, shared);
 
 	mutex_unlock(&reset_list_mutex);
 
@@ -385,6 +385,17 @@ struct reset_control *__of_reset_control_get(struct device_node *node,
 }
 EXPORT_SYMBOL_GPL(__of_reset_control_get);
 
+struct reset_control *__reset_control_get(struct device *dev, const char *id,
+					  int index, bool shared, bool optional)
+{
+	if (dev->of_node)
+		return __of_reset_control_get(dev->of_node, id, index, shared,
+					      optional);
+
+	return optional ? NULL : ERR_PTR(-EINVAL);
+}
+EXPORT_SYMBOL_GPL(__reset_control_get);
+
 /**
  * reset_control_put - free the reset controller
  * @rstc: reset controller
@@ -396,7 +407,7 @@ void reset_control_put(struct reset_control *rstc)
 		return;
 
 	mutex_lock(&reset_list_mutex);
-	__reset_control_put(rstc);
+	__reset_control_put_internal(rstc);
 	mutex_unlock(&reset_list_mutex);
 }
 EXPORT_SYMBOL_GPL(reset_control_put);
@@ -417,8 +428,7 @@ struct reset_control *__devm_reset_control_get(struct device *dev,
 	if (!ptr)
 		return ERR_PTR(-ENOMEM);
 
-	rstc = __of_reset_control_get(dev ? dev->of_node : NULL,
-				      id, index, shared, optional);
+	rstc = __reset_control_get(dev, id, index, shared, optional);
 	if (!IS_ERR(rstc)) {
 		*ptr = rstc;
 		devres_add(dev, ptr);
diff --git a/include/linux/reset.h b/include/linux/reset.h
index 96fb139bdd08..13d8681210d5 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -15,6 +15,9 @@ int reset_control_status(struct reset_control *rstc);
 struct reset_control *__of_reset_control_get(struct device_node *node,
 				     const char *id, int index, bool shared,
 				     bool optional);
+struct reset_control *__reset_control_get(struct device *dev, const char *id,
+					  int index, bool shared,
+					  bool optional);
 void reset_control_put(struct reset_control *rstc);
 struct reset_control *__devm_reset_control_get(struct device *dev,
 				     const char *id, int index, bool shared,
@@ -72,6 +75,13 @@ static inline struct reset_control *__of_reset_control_get(
 	return optional ? NULL : ERR_PTR(-ENOTSUPP);
 }
 
+static inline struct reset_control *__reset_control_get(
+					struct device *dev, const char *id,
+					int index, bool shared, bool optional)
+{
+	return optional ? NULL : ERR_PTR(-ENOTSUPP);
+}
+
 static inline struct reset_control *__devm_reset_control_get(
 					struct device *dev, const char *id,
 					int index, bool shared, bool optional)
@@ -102,8 +112,7 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id)
 #ifndef CONFIG_RESET_CONTROLLER
 	WARN_ON(1);
 #endif
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false,
-									false);
+	return __reset_control_get(dev, id, 0, false, false);
 }
 
 /**
@@ -131,22 +140,19 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id)
 static inline struct reset_control *reset_control_get_shared(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true,
-									false);
+	return __reset_control_get(dev, id, 0, true, false);
 }
 
 static inline struct reset_control *reset_control_get_optional_exclusive(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false,
-									true);
+	return __reset_control_get(dev, id, 0, false, true);
 }
 
 static inline struct reset_control *reset_control_get_optional_shared(
 					struct device *dev, const char *id)
 {
-	return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true,
-									true);
+	return __reset_control_get(dev, id, 0, true, true);
 }
 
 /**

From 57c1d4c33e8f7ec90976d79127059c1919cc0651 Mon Sep 17 00:00:00 2001
From: Markus Marb <markus@marb.org>
Date: Fri, 17 Mar 2017 23:14:47 +0100
Subject: [PATCH 215/410] can: ifi: use correct register to read rx status

The incorrect offset was used when trying to read the RXSTCMD register.

Signed-off-by: Markus Marb <markus@marb.org>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/ifi_canfd/ifi_canfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c
index 138f5ae75c0b..4d1fe8d95042 100644
--- a/drivers/net/can/ifi_canfd/ifi_canfd.c
+++ b/drivers/net/can/ifi_canfd/ifi_canfd.c
@@ -557,7 +557,7 @@ static int ifi_canfd_poll(struct napi_struct *napi, int quota)
 	int work_done = 0;
 
 	u32 stcmd = readl(priv->base + IFI_CANFD_STCMD);
-	u32 rxstcmd = readl(priv->base + IFI_CANFD_STCMD);
+	u32 rxstcmd = readl(priv->base + IFI_CANFD_RXSTCMD);
 	u32 errctr = readl(priv->base + IFI_CANFD_ERROR_CTR);
 
 	/* Handle bus state changes */

From 04abaf07f6d5cdf22b7a478a86e706dfeeeef960 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Thu, 30 Mar 2017 14:58:18 -0500
Subject: [PATCH 216/410] ARM: OMAP2+: omap_device: Sync omap_device and
 pm_runtime after probe defer

Starting from commit 5de85b9d57ab ("PM / runtime: Re-init runtime PM
states at probe error and driver unbind") pm_runtime core now changes
device runtime_status back to after RPM_SUSPENDED after a probe defer.
Certain OMAP devices make use of "ti,no-idle-on-init" flag which causes
omap_device_enable to be called during the BUS_NOTIFY_ADD_DEVICE event
during probe, along with pm_runtime_set_active.

This call to pm_runtime_set_active typically will prevent a call to
pm_runtime_get in a driver probe function from re-enabling the
omap_device. However, in the case of a probe defer that happens before
the driver probe function is able to run, such as a missing pinctrl
states defer, pm_runtime_reinit will set the device as RPM_SUSPENDED and
then once driver probe is actually able to run, pm_runtime_get will see
the device as suspended and call through to the omap_device layer,
attempting to enable the already enabled omap_device and causing errors
like this:

omap-gpmc 50000000.gpmc: omap_device: omap_device_enable() called from
invalid state 1
omap-gpmc 50000000.gpmc: use pm_runtime_put_sync_suspend() in driver?

We can avoid this error by making sure the pm_runtime status of a device
matches the omap_device state before a probe attempt. By extending the
omap_device bus notifier to act on the BUS_NOTIFY_BIND_DRIVER event we
can check if a device is enabled in omap_device but with a pm_runtime
status of RPM_SUSPENDED and once again mark the device as RPM_ACTIVE to
avoid a second incorrect call to omap_device_enable.

Fixes: 5de85b9d57ab ("PM / runtime: Re-init runtime PM states at probe
error and driver unbind")
Tested-by: Franklin S Cooper Jr. <fcooper@ti.com>
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/omap_device.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index e920dd83e443..f989145480c8 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -222,6 +222,14 @@ static int _omap_device_notifier_call(struct notifier_block *nb,
 				dev_err(dev, "failed to idle\n");
 		}
 		break;
+	case BUS_NOTIFY_BIND_DRIVER:
+		od = to_omap_device(pdev);
+		if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED) &&
+		    pm_runtime_status_suspended(dev)) {
+			od->_driver_status = BUS_NOTIFY_BIND_DRIVER;
+			pm_runtime_set_active(dev);
+		}
+		break;
 	case BUS_NOTIFY_ADD_DEVICE:
 		if (pdev->dev.of_node)
 			omap_device_build_from_dt(pdev);

From ca257b9e2d807ab6cb2678ecc7b74aaf4651f597 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 3 Apr 2017 12:11:26 +0200
Subject: [PATCH 217/410] can: rcar_can: Do not print virtual addresses

During probe, the rcar_can driver prints:

    rcar_can e6e80000.can: device registered (regs @ e08bc000, IRQ76)

The "regs" value is a virtual address, exposing internal information,
hence stop printing it.  The (useful) physical address is already
printed as part of the device name.

Fixes: fd1159318e55e901 ("can: add Renesas R-Car CAN driver")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/rcar/rcar_can.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c
index caed4e6960f8..11662f479e76 100644
--- a/drivers/net/can/rcar/rcar_can.c
+++ b/drivers/net/can/rcar/rcar_can.c
@@ -826,8 +826,7 @@ static int rcar_can_probe(struct platform_device *pdev)
 
 	devm_can_led_init(ndev);
 
-	dev_info(&pdev->dev, "device registered (regs @ %p, IRQ%d)\n",
-		 priv->regs, ndev->irq);
+	dev_info(&pdev->dev, "device registered (IRQ%d)\n", ndev->irq);
 
 	return 0;
 fail_candev:

From e08293a4ccbcc993ded0fdc46f1e57926b833d63 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Mon, 3 Apr 2017 12:03:13 +0200
Subject: [PATCH 218/410] l2tp: take reference on sessions being dumped

Take a reference on the sessions returned by l2tp_session_find_nth()
(and rename it l2tp_session_get_nth() to reflect this change), so that
caller is assured that the session isn't going to disappear while
processing it.

For procfs and debugfs handlers, the session is held in the .start()
callback and dropped in .show(). Given that pppol2tp_seq_session_show()
dereferences the associated PPPoL2TP socket and that
l2tp_dfs_seq_session_show() might call pppol2tp_show(), we also need to
call the session's .ref() callback to prevent the socket from going
away from under us.

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Fixes: 0ad6614048cf ("l2tp: Add debugfs files for dumping l2tp debug info")
Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c    |  8 ++++++--
 net/l2tp/l2tp_core.h    |  3 ++-
 net/l2tp/l2tp_debugfs.c | 10 +++++++---
 net/l2tp/l2tp_netlink.c |  7 +++++--
 net/l2tp/l2tp_ppp.c     | 10 +++++++---
 5 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index e927422d8c58..e37d9554da7b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -327,7 +327,8 @@ struct l2tp_session *l2tp_session_get(struct net *net,
 }
 EXPORT_SYMBOL_GPL(l2tp_session_get);
 
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+					  bool do_ref)
 {
 	int hash;
 	struct l2tp_session *session;
@@ -337,6 +338,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
 		hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
 			if (++count > nth) {
+				l2tp_session_inc_refcount(session);
+				if (do_ref && session->ref)
+					session->ref(session);
 				read_unlock_bh(&tunnel->hlist_lock);
 				return session;
 			}
@@ -347,7 +351,7 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
 
 /* Lookup a session by interface name.
  * This is very inefficient but is only used by management interfaces.
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 3b9b704a84e4..8ce7818c7a9d 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -236,7 +236,8 @@ struct l2tp_session *l2tp_session_get(struct net *net,
 struct l2tp_session *l2tp_session_find(struct net *net,
 				       struct l2tp_tunnel *tunnel,
 				       u32 session_id);
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+					  bool do_ref);
 struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
 						bool do_ref);
 struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 2d6760a2ae34..d100aed3d06f 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
 
 static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
 {
-	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+	pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
 	pd->session_idx++;
 
 	if (pd->session == NULL) {
@@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
 	}
 
 	/* Show the tunnel or session context */
-	if (pd->session == NULL)
+	if (!pd->session) {
 		l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
-	else
+	} else {
 		l2tp_dfs_seq_session_show(m, pd->session);
+		if (pd->session->deref)
+			pd->session->deref(pd->session);
+		l2tp_session_dec_refcount(pd->session);
+	}
 
 out:
 	return 0;
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 93e317377c66..7e3e669baac4 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -867,7 +867,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 				goto out;
 		}
 
-		session = l2tp_session_find_nth(tunnel, si);
+		session = l2tp_session_get_nth(tunnel, si, false);
 		if (session == NULL) {
 			ti++;
 			tunnel = NULL;
@@ -877,8 +877,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 session, L2TP_CMD_SESSION_GET) < 0)
+					 session, L2TP_CMD_SESSION_GET) < 0) {
+			l2tp_session_dec_refcount(session);
 			break;
+		}
+		l2tp_session_dec_refcount(session);
 
 		si++;
 	}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 26501902d1a7..7bf73091baa2 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1560,7 +1560,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
 
 static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
 {
-	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+	pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
 	pd->session_idx++;
 
 	if (pd->session == NULL) {
@@ -1687,10 +1687,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
 
 	/* Show the tunnel or session context.
 	 */
-	if (pd->session == NULL)
+	if (!pd->session) {
 		pppol2tp_seq_tunnel_show(m, pd->tunnel);
-	else
+	} else {
 		pppol2tp_seq_session_show(m, pd->session);
+		if (pd->session->deref)
+			pd->session->deref(pd->session);
+		l2tp_session_dec_refcount(pd->session);
+	}
 
 out:
 	return 0;

From a8919661d78b90d747b3514197e49ecf8727d08c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 3 Apr 2017 11:19:10 +0100
Subject: [PATCH 219/410] bnx2x: fix spelling mistake in macros
 HW_INTERRUT_ASSERT_SET_*

Trival fix, rename HW_INTERRUT_ASSERT_SET_* to HW_INTERRUPT_ASSERT_SET_*

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Yuval Mintz <Yuval.Mintz@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h    |  6 +++---
 .../net/ethernet/broadcom/bnx2x/bnx2x_main.c   | 18 +++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 0a23034bbe3f..352beff796ae 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2277,7 +2277,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
 				 GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RBCP) | \
 				 GENERAL_ATTEN_OFFSET(LATCHED_ATTN_RSVD_GRC))
 
-#define HW_INTERRUT_ASSERT_SET_0 \
+#define HW_INTERRUPT_ASSERT_SET_0 \
 				(AEU_INPUTS_ATTN_BITS_TSDM_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_TCM_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_TSEMI_HW_INTERRUPT | \
@@ -2290,7 +2290,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
 				 AEU_INPUTS_ATTN_BITS_TSEMI_PARITY_ERROR |\
 				 AEU_INPUTS_ATTN_BITS_TCM_PARITY_ERROR |\
 				 AEU_INPUTS_ATTN_BITS_PBCLIENT_PARITY_ERROR)
-#define HW_INTERRUT_ASSERT_SET_1 \
+#define HW_INTERRUPT_ASSERT_SET_1 \
 				(AEU_INPUTS_ATTN_BITS_QM_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_TIMERS_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_XSDM_HW_INTERRUPT | \
@@ -2318,7 +2318,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
 				 AEU_INPUTS_ATTN_BITS_UPB_PARITY_ERROR | \
 				 AEU_INPUTS_ATTN_BITS_CSDM_PARITY_ERROR |\
 				 AEU_INPUTS_ATTN_BITS_CCM_PARITY_ERROR)
-#define HW_INTERRUT_ASSERT_SET_2 \
+#define HW_INTERRUPT_ASSERT_SET_2 \
 				(AEU_INPUTS_ATTN_BITS_CSEMI_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_CDU_HW_INTERRUPT | \
 				 AEU_INPUTS_ATTN_BITS_DMAE_HW_INTERRUPT | \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index ac76fc251d26..a851f95c307a 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -4166,14 +4166,14 @@ static void bnx2x_attn_int_deasserted0(struct bnx2x *bp, u32 attn)
 		bnx2x_release_phy_lock(bp);
 	}
 
-	if (attn & HW_INTERRUT_ASSERT_SET_0) {
+	if (attn & HW_INTERRUPT_ASSERT_SET_0) {
 
 		val = REG_RD(bp, reg_offset);
-		val &= ~(attn & HW_INTERRUT_ASSERT_SET_0);
+		val &= ~(attn & HW_INTERRUPT_ASSERT_SET_0);
 		REG_WR(bp, reg_offset, val);
 
 		BNX2X_ERR("FATAL HW block attention set0 0x%x\n",
-			  (u32)(attn & HW_INTERRUT_ASSERT_SET_0));
+			  (u32)(attn & HW_INTERRUPT_ASSERT_SET_0));
 		bnx2x_panic();
 	}
 }
@@ -4191,7 +4191,7 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn)
 			BNX2X_ERR("FATAL error from DORQ\n");
 	}
 
-	if (attn & HW_INTERRUT_ASSERT_SET_1) {
+	if (attn & HW_INTERRUPT_ASSERT_SET_1) {
 
 		int port = BP_PORT(bp);
 		int reg_offset;
@@ -4200,11 +4200,11 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x *bp, u32 attn)
 				     MISC_REG_AEU_ENABLE1_FUNC_0_OUT_1);
 
 		val = REG_RD(bp, reg_offset);
-		val &= ~(attn & HW_INTERRUT_ASSERT_SET_1);
+		val &= ~(attn & HW_INTERRUPT_ASSERT_SET_1);
 		REG_WR(bp, reg_offset, val);
 
 		BNX2X_ERR("FATAL HW block attention set1 0x%x\n",
-			  (u32)(attn & HW_INTERRUT_ASSERT_SET_1));
+			  (u32)(attn & HW_INTERRUPT_ASSERT_SET_1));
 		bnx2x_panic();
 	}
 }
@@ -4235,7 +4235,7 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn)
 		}
 	}
 
-	if (attn & HW_INTERRUT_ASSERT_SET_2) {
+	if (attn & HW_INTERRUPT_ASSERT_SET_2) {
 
 		int port = BP_PORT(bp);
 		int reg_offset;
@@ -4244,11 +4244,11 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x *bp, u32 attn)
 				     MISC_REG_AEU_ENABLE1_FUNC_0_OUT_2);
 
 		val = REG_RD(bp, reg_offset);
-		val &= ~(attn & HW_INTERRUT_ASSERT_SET_2);
+		val &= ~(attn & HW_INTERRUPT_ASSERT_SET_2);
 		REG_WR(bp, reg_offset, val);
 
 		BNX2X_ERR("FATAL HW block attention set2 0x%x\n",
-			  (u32)(attn & HW_INTERRUT_ASSERT_SET_2));
+			  (u32)(attn & HW_INTERRUPT_ASSERT_SET_2));
 		bnx2x_panic();
 	}
 }

From 249ee819e24c180909f43c1173c8ef6724d21faf Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Mon, 3 Apr 2017 13:23:15 +0200
Subject: [PATCH 220/410] l2tp: fix PPP pseudo-wire auto-loading

PPP pseudo-wire type is 7 (11 is L2TP_PWTYPE_IP).

Fixes: f1f39f911027 ("l2tp: auto load type modules")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 7bf73091baa2..861b255a2d51 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1853,4 +1853,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(PPPOL2TP_DRV_VERSION);
 MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP);
-MODULE_ALIAS_L2TP_PWTYPE(11);
+MODULE_ALIAS_L2TP_PWTYPE(7);

From 30c57f073449e09ae42f908bfff56c08c8751a6f Mon Sep 17 00:00:00 2001
From: Sekhar Nori <nsekhar@ti.com>
Date: Mon, 3 Apr 2017 17:34:28 +0530
Subject: [PATCH 221/410] net: ethernet: ti: cpsw: fix race condition during
 open()

TI's cpsw driver handles both OF and non-OF case for phy
connect. Unfortunately of_phy_connect() returns NULL on
error while phy_connect() returns ERR_PTR().

To handle this, cpsw_slave_open() overrides the return value
from phy_connect() to make it NULL or error.

This leaves a small window, where cpsw_adjust_link() may be
invoked for a slave while slave->phy pointer is temporarily
set to -ENODEV (or some other error) before it is finally set
to NULL.

_cpsw_adjust_link() only handles the NULL case, and an oops
results when ERR_PTR() is seen by it.

Note that cpsw_adjust_link() checks PHY status for each
slave whenever it is invoked. It can so happen that even
though phy_connect() for a given slave returns error,
_cpsw_adjust_link() is still called for that slave because
the link status of another slave changed.

Fix this by using a temporary pointer to store return value
of {of_}phy_connect() and do a one-time write to slave->phy.

Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reported-by: Yan Liu <yan-liu@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 58cdc066ef2c..fa674a8bda0c 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1267,6 +1267,7 @@ static void soft_reset_slave(struct cpsw_slave *slave)
 static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 {
 	u32 slave_port;
+	struct phy_device *phy;
 	struct cpsw_common *cpsw = priv->cpsw;
 
 	soft_reset_slave(slave);
@@ -1300,27 +1301,28 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
 	if (slave->data->phy_node) {
-		slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node,
+		phy = of_phy_connect(priv->ndev, slave->data->phy_node,
 				 &cpsw_adjust_link, 0, slave->data->phy_if);
-		if (!slave->phy) {
+		if (!phy) {
 			dev_err(priv->dev, "phy \"%s\" not found on slave %d\n",
 				slave->data->phy_node->full_name,
 				slave->slave_num);
 			return;
 		}
 	} else {
-		slave->phy = phy_connect(priv->ndev, slave->data->phy_id,
+		phy = phy_connect(priv->ndev, slave->data->phy_id,
 				 &cpsw_adjust_link, slave->data->phy_if);
-		if (IS_ERR(slave->phy)) {
+		if (IS_ERR(phy)) {
 			dev_err(priv->dev,
 				"phy \"%s\" not found on slave %d, err %ld\n",
 				slave->data->phy_id, slave->slave_num,
-				PTR_ERR(slave->phy));
-			slave->phy = NULL;
+				PTR_ERR(phy));
 			return;
 		}
 	}
 
+	slave->phy = phy;
+
 	phy_attached_info(slave->phy);
 
 	phy_start(slave->phy);

From fe514739d8538783749d3ce72f78e5a999ea5668 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 4 Apr 2017 15:08:36 -0700
Subject: [PATCH 222/410] libnvdimm: fix blk free space accounting

Commit a1f3e4d6a0c3 "libnvdimm, region: update nd_region_available_dpa()
for multi-pmem support" reworked blk dpa (DIMM Physical Address)
accounting to comprehend multiple pmem namespace allocations aliasing
with a given blk-dpa range.

The following call trace is a result of failing to account for allocated
blk capacity.

 WARNING: CPU: 1 PID: 2433 at tools/testing/nvdimm/../../../drivers/nvdimm/names
4 size_store+0x6f3/0x930 [libnvdimm]
 nd_region region5: allocation underrun: 0x0 of 0x1000000 bytes
 [..]
 Call Trace:
  dump_stack+0x86/0xc3
  __warn+0xcb/0xf0
  warn_slowpath_fmt+0x5f/0x80
  size_store+0x6f3/0x930 [libnvdimm]
  dev_attr_store+0x18/0x30

If a given blk-dpa allocation does not alias with any pmem ranges then
the full allocation should be accounted as busy space, not the size of
the current pmem contribution to the region.

The thinkos that led to this confusion was not realizing that the struct
resource management is already guaranteeing no collisions between pmem
allocations and blk allocations on the same dimm. Also, we do not try to
support blk allocations in aliased pmem holes.

This patch also fixes a case where the available blk goes negative.

Cc: <stable@vger.kernel.org>
Fixes: a1f3e4d6a0c3 ("libnvdimm, region: update nd_region_available_dpa() for multi-pmem support").
Reported-by: Dariusz Dokupil <dariusz.dokupil@intel.com>
Reported-by: Dave Jiang <dave.jiang@intel.com>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Tested-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/dimm_devs.c | 77 ++++++--------------------------------
 1 file changed, 11 insertions(+), 66 deletions(-)

diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 0eedc49e0d47..8b721321be5b 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nvdimm_create);
 
 int alias_dpa_busy(struct device *dev, void *data)
 {
-	resource_size_t map_end, blk_start, new, busy;
+	resource_size_t map_end, blk_start, new;
 	struct blk_alloc_info *info = data;
 	struct nd_mapping *nd_mapping;
 	struct nd_region *nd_region;
@@ -436,29 +436,19 @@ int alias_dpa_busy(struct device *dev, void *data)
  retry:
 	/*
 	 * Find the free dpa from the end of the last pmem allocation to
-	 * the end of the interleave-set mapping that is not already
-	 * covered by a blk allocation.
+	 * the end of the interleave-set mapping.
 	 */
-	busy = 0;
 	for_each_dpa_resource(ndd, res) {
+		if (strncmp(res->name, "pmem", 4) != 0)
+			continue;
 		if ((res->start >= blk_start && res->start < map_end)
 				|| (res->end >= blk_start
 					&& res->end <= map_end)) {
-			if (strncmp(res->name, "pmem", 4) == 0) {
-				new = max(blk_start, min(map_end + 1,
-							res->end + 1));
-				if (new != blk_start) {
-					blk_start = new;
-					goto retry;
-				}
-			} else
-				busy += min(map_end, res->end)
-					- max(nd_mapping->start, res->start) + 1;
-		} else if (nd_mapping->start > res->start
-				&& map_end < res->end) {
-			/* total eclipse of the PMEM region mapping */
-			busy += nd_mapping->size;
-			break;
+			new = max(blk_start, min(map_end + 1, res->end + 1));
+			if (new != blk_start) {
+				blk_start = new;
+				goto retry;
+			}
 		}
 	}
 
@@ -470,52 +460,11 @@ int alias_dpa_busy(struct device *dev, void *data)
 		return 1;
 	}
 
-	info->available -= blk_start - nd_mapping->start + busy;
+	info->available -= blk_start - nd_mapping->start;
 
 	return 0;
 }
 
-static int blk_dpa_busy(struct device *dev, void *data)
-{
-	struct blk_alloc_info *info = data;
-	struct nd_mapping *nd_mapping;
-	struct nd_region *nd_region;
-	resource_size_t map_end;
-	int i;
-
-	if (!is_nd_pmem(dev))
-		return 0;
-
-	nd_region = to_nd_region(dev);
-	for (i = 0; i < nd_region->ndr_mappings; i++) {
-		nd_mapping  = &nd_region->mapping[i];
-		if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
-			break;
-	}
-
-	if (i >= nd_region->ndr_mappings)
-		return 0;
-
-	map_end = nd_mapping->start + nd_mapping->size - 1;
-	if (info->res->start >= nd_mapping->start
-			&& info->res->start < map_end) {
-		if (info->res->end <= map_end) {
-			info->busy = 0;
-			return 1;
-		} else {
-			info->busy -= info->res->end - map_end;
-			return 0;
-		}
-	} else if (info->res->end >= nd_mapping->start
-			&& info->res->end <= map_end) {
-		info->busy -= nd_mapping->start - info->res->start;
-		return 0;
-	} else {
-		info->busy -= nd_mapping->size;
-		return 0;
-	}
-}
-
 /**
  * nd_blk_available_dpa - account the unused dpa of BLK region
  * @nd_mapping: container of dpa-resource-root + labels
@@ -545,11 +494,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
 	for_each_dpa_resource(ndd, res) {
 		if (strncmp(res->name, "blk", 3) != 0)
 			continue;
-
-		info.res = res;
-		info.busy = resource_size(res);
-		device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
-		info.available -= info.busy;
+		info.available -= resource_size(res);
 	}
 
 	return info.available;

From 540f4c0e894f7e46a66dfa424b16424cbdc12c38 Mon Sep 17 00:00:00 2001
From: "Cohen, Eugene" <eugene@hp.com>
Date: Tue, 4 Apr 2017 16:27:43 +0100
Subject: [PATCH 223/410] efi/libstub: Skip GOP with PIXEL_BLT_ONLY format

The UEFI Specification permits Graphics Output Protocol (GOP) instances
without direct framebuffer access. This is indicated in the Mode structure
with a PixelFormat enumeration value of PIXEL_BLT_ONLY. Given that the
kernel does not know how to drive a Blt() only framebuffer (which is only
permitted before ExitBootServices() anyway), we should disregard such
framebuffers when looking for a GOP instance that is suitable for use as
the boot console.

So modify the EFI GOP initialization to not use a PIXEL_BLT_ONLY instance,
preventing attempts later in boot to use an invalid screen_info.lfb_base
address.

Signed-off-by: Eugene Cohen <eugene@hp.com>
[ Moved the Blt() only check into the loop and clarified that Blt() only GOPs are unusable by the kernel. ]
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # v4.7+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: leif.lindholm@linaro.org
Cc: linux-efi@vger.kernel.org
Cc: lorenzo.pieralisi@arm.com
Fixes: 9822504c1fa5 ("efifb: Enable the efi-framebuffer platform driver ...")
Link: http://lkml.kernel.org/r/20170404152744.26687-2-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/gop.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c
index 932742e4cf23..24c461dea7af 100644
--- a/drivers/firmware/efi/libstub/gop.c
+++ b/drivers/firmware/efi/libstub/gop.c
@@ -149,7 +149,8 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
 
 		status = __gop_query32(sys_table_arg, gop32, &info, &size,
 				       &current_fb_base);
-		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+		if (status == EFI_SUCCESS && (!first_gop || conout_found) &&
+		    info->pixel_format != PIXEL_BLT_ONLY) {
 			/*
 			 * Systems that use the UEFI Console Splitter may
 			 * provide multiple GOP devices, not all of which are
@@ -266,7 +267,8 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
 
 		status = __gop_query64(sys_table_arg, gop64, &info, &size,
 				       &current_fb_base);
-		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
+		if (status == EFI_SUCCESS && (!first_gop || conout_found) &&
+		    info->pixel_format != PIXEL_BLT_ONLY) {
 			/*
 			 * Systems that use the UEFI Console Splitter may
 			 * provide multiple GOP devices, not all of which are

From cfac6dfa42bddfa9711b20d486e521d1a41ab09f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 4 Apr 2017 18:15:01 +0200
Subject: [PATCH 224/410] x86/signals: Fix lower/upper bound reporting in
 compat siginfo

Put the right values from the original siginfo into the
userspace compat-siginfo.

This fixes the 32-bit MPX "tabletest" testcase on 64-bit kernels.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: <stable@vger.kernel.org> # v4.8+
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: a4455082dc6f0 ('x86/signals: Add missing signal_compat code for x86 features')
Link: http://lkml.kernel.org/r/1491322501-5054-1-git-send-email-joro@8bytes.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/signal_compat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index ec1f756f9dc9..71beb28600d4 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -151,8 +151,8 @@ int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from,
 
 				if (from->si_signo == SIGSEGV) {
 					if (from->si_code == SEGV_BNDERR) {
-						compat_uptr_t lower = (unsigned long)&to->si_lower;
-						compat_uptr_t upper = (unsigned long)&to->si_upper;
+						compat_uptr_t lower = (unsigned long)from->si_lower;
+						compat_uptr_t upper = (unsigned long)from->si_upper;
 						put_user_ex(lower, &to->si_lower);
 						put_user_ex(upper, &to->si_upper);
 					}

From 55d728a40d368ba80443be85c02e641fc9082a3f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 4 Apr 2017 16:27:44 +0100
Subject: [PATCH 225/410] efi/fb: Avoid reconfiguration of BAR that covers the
 framebuffer

On UEFI systems, the PCI subsystem is enumerated by the firmware,
and if a graphical framebuffer is exposed via a PCI device, its base
address and size are exposed to the OS via the Graphics Output
Protocol (GOP).

On arm64 PCI systems, the entire PCI hierarchy is reconfigured from
scratch at boot. This may result in the GOP framebuffer address to
become stale, if the BAR covering the framebuffer is modified. This
will cause the framebuffer to become unresponsive, and may in some
cases result in unpredictable behavior if the range is reassigned to
another device.

So add a non-x86 quirk to the EFI fb driver to find the BAR associated
with the GOP base address, and claim the BAR resource so that the PCI
core will not move it.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # v4.7+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Jones <pjones@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: leif.lindholm@linaro.org
Cc: linux-efi@vger.kernel.org
Cc: lorenzo.pieralisi@arm.com
Fixes: 9822504c1fa5 ("efifb: Enable the efi-framebuffer platform driver ...")
Link: http://lkml.kernel.org/r/20170404152744.26687-3-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/video/fbdev/efifb.c | 66 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/efifb.c b/drivers/video/fbdev/efifb.c
index 8c4dc1e1f94f..b827a8113e26 100644
--- a/drivers/video/fbdev/efifb.c
+++ b/drivers/video/fbdev/efifb.c
@@ -10,6 +10,7 @@
 #include <linux/efi.h>
 #include <linux/errno.h>
 #include <linux/fb.h>
+#include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/screen_info.h>
 #include <video/vga.h>
@@ -143,6 +144,8 @@ static struct attribute *efifb_attrs[] = {
 };
 ATTRIBUTE_GROUPS(efifb);
 
+static bool pci_dev_disabled;	/* FB base matches BAR of a disabled device */
+
 static int efifb_probe(struct platform_device *dev)
 {
 	struct fb_info *info;
@@ -152,7 +155,7 @@ static int efifb_probe(struct platform_device *dev)
 	unsigned int size_total;
 	char *option = NULL;
 
-	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
+	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || pci_dev_disabled)
 		return -ENODEV;
 
 	if (fb_get_options("efifb", &option))
@@ -360,3 +363,64 @@ static struct platform_driver efifb_driver = {
 };
 
 builtin_platform_driver(efifb_driver);
+
+#if defined(CONFIG_PCI) && !defined(CONFIG_X86)
+
+static bool pci_bar_found;	/* did we find a BAR matching the efifb base? */
+
+static void claim_efifb_bar(struct pci_dev *dev, int idx)
+{
+	u16 word;
+
+	pci_bar_found = true;
+
+	pci_read_config_word(dev, PCI_COMMAND, &word);
+	if (!(word & PCI_COMMAND_MEMORY)) {
+		pci_dev_disabled = true;
+		dev_err(&dev->dev,
+			"BAR %d: assigned to efifb but device is disabled!\n",
+			idx);
+		return;
+	}
+
+	if (pci_claim_resource(dev, idx)) {
+		pci_dev_disabled = true;
+		dev_err(&dev->dev,
+			"BAR %d: failed to claim resource for efifb!\n", idx);
+		return;
+	}
+
+	dev_info(&dev->dev, "BAR %d: assigned to efifb\n", idx);
+}
+
+static void efifb_fixup_resources(struct pci_dev *dev)
+{
+	u64 base = screen_info.lfb_base;
+	u64 size = screen_info.lfb_size;
+	int i;
+
+	if (pci_bar_found || screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
+		return;
+
+	if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+		base |= (u64)screen_info.ext_lfb_base << 32;
+
+	if (!base)
+		return;
+
+	for (i = 0; i < PCI_STD_RESOURCE_END; i++) {
+		struct resource *res = &dev->resource[i];
+
+		if (!(res->flags & IORESOURCE_MEM))
+			continue;
+
+		if (res->start <= base && res->end >= base + size - 1) {
+			claim_efifb_bar(dev, i);
+			break;
+		}
+	}
+}
+DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, PCI_BASE_CLASS_DISPLAY,
+			       16, efifb_fixup_resources);
+
+#endif

From 8f5f525d5b83f7d76a6baf9c4e94d4bf312ea7f6 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Mon, 3 Apr 2017 13:25:12 +1000
Subject: [PATCH 226/410] powerpc/64: Fix flush_(d|i)cache_range() called from
 modules

When the kernel is compiled to use 64bit ABIv2 the _GLOBAL() macro does
not include a global entry point. A function's global entry point is
used when the function is called from a different TOC context and in the
kernel this typically means a call from a module into the vmlinux (or
vice-versa).

There are a few exported asm functions declared with _GLOBAL() and
calling them from a module will likely crash the kernel since any TOC
relative load will yield garbage.

flush_icache_range() and flush_dcache_range() are both exported to
modules, and use the TOC, so must use _GLOBAL_TOC().

Fixes: 721aeaa9fdf3 ("powerpc: Build little endian ppc64 kernel with ABIv2")
Cc: stable@vger.kernel.org # v3.16+
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/misc_64.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index ae179cb1bb3c..c119044cad0d 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -67,7 +67,7 @@ PPC64_CACHES:
  *   flush all bytes from start through stop-1 inclusive
  */
 
-_GLOBAL(flush_icache_range)
+_GLOBAL_TOC(flush_icache_range)
 BEGIN_FTR_SECTION
 	PURGE_PREFETCHED_INS
 	blr
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(flush_icache_range)
  *
  *    flush all bytes from start to stop-1 inclusive
  */
-_GLOBAL(flush_dcache_range)
+_GLOBAL_TOC(flush_dcache_range)
 
 /*
  * Flush the data cache to memory 

From 88b1bf7268f56887ca88eb09c6fb0f4fc970121a Mon Sep 17 00:00:00 2001
From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Date: Wed, 29 Mar 2017 19:19:42 +0200
Subject: [PATCH 227/410] powerpc/mm: Add missing global TLB invalidate if cxl
 is active

Commit 4c6d9acce1f4 ("powerpc/mm: Add hooks for cxl") converted local
TLB invalidates to global if the cxl driver is active. This is necessary
because the CAPP snoops invalidations to forward them to the PSL on the
cxl adapter. However one path was forgotten. native_flush_hash_range()
still does local TLB invalidates, as found out the hard way recently.

This patch fixes it by following the same logic as previously: if the
cxl driver is active, the local TLB invalidates are 'upgraded' to
global.

Fixes: 4c6d9acce1f4 ("powerpc/mm: Add hooks for cxl")
Cc: stable@vger.kernel.org # v3.18+
Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/hash_native_64.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index cc332608e656..65bb8f33b399 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -638,6 +638,10 @@ static void native_flush_hash_range(unsigned long number, int local)
 	unsigned long psize = batch->psize;
 	int ssize = batch->ssize;
 	int i;
+	unsigned int use_local;
+
+	use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) &&
+		mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use();
 
 	local_irq_save(flags);
 
@@ -667,8 +671,7 @@ static void native_flush_hash_range(unsigned long number, int local)
 		} pte_iterate_hashed_end();
 	}
 
-	if (mmu_has_feature(MMU_FTR_TLBIEL) &&
-	    mmu_psize_defs[psize].tlbiel && local) {
+	if (use_local) {
 		asm volatile("ptesync":::"memory");
 		for (i = 0; i < number; i++) {
 			vpn = batch->vpn[i];

From b2376407f98920c9b0c411948675f58a9640be35 Mon Sep 17 00:00:00 2001
From: Vic Yang <victoryang@google.com>
Date: Fri, 24 Mar 2017 18:44:01 +0100
Subject: [PATCH 228/410] mfd: cros-ec: Fix host command buffer size

For SPI, we can get up to 32 additional bytes for response preamble.
The current overhead (2 bytes) may cause problems when we try to receive
a big response. Update it to 32 bytes.

Without this fix we could see a kernel BUG when we receive a big response
from the Chrome EC when is connected via SPI.

Signed-off-by: Vic Yang <victoryang@google.com>
Tested-by: Enric Balletbo i Serra <enric.balletbo.collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 7a01c94496f1..3eef9fb9968a 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -35,10 +35,11 @@
  * Max bus-specific overhead incurred by request/responses.
  * I2C requires 1 additional byte for requests.
  * I2C requires 2 additional bytes for responses.
+ * SPI requires up to 32 additional bytes for responses.
  * */
 #define EC_PROTO_VERSION_UNKNOWN	0
 #define EC_MAX_REQUEST_OVERHEAD		1
-#define EC_MAX_RESPONSE_OVERHEAD	2
+#define EC_MAX_RESPONSE_OVERHEAD	32
 
 /*
  * Command interface between EC and AP, for LPC, I2C and SPI interfaces.

From ec360607a25fae97c81eef2f02268ae8ed3649b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 3 Apr 2017 18:12:04 +0300
Subject: [PATCH 229/410] crypto: caam - fix JR platform device subsequent
 (re)creations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The way Job Ring platform devices are created and released does not
allow for multiple create-release cycles.

JR0 Platform device creation error
JR0 Platform device creation error
caam 2100000.caam: no queues configured, terminating
caam: probe of 2100000.caam failed with error -12

The reason is that platform devices are created for each job ring:

        for_each_available_child_of_node(nprop, np)
                if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
                    of_device_is_compatible(np, "fsl,sec4.0-job-ring")) {
                        ctrlpriv->jrpdev[ring] =
                                of_platform_device_create(np, NULL, dev);

which sets OF_POPULATED on the device node, but then it cleans these up:

        /* Remove platform devices for JobRs */
        for (ring = 0; ring < ctrlpriv->total_jobrs; ring++) {
                if (ctrlpriv->jrpdev[ring])
                        of_device_unregister(ctrlpriv->jrpdev[ring]);
        }

which leaves OF_POPULATED set.

Use of_platform_populate / of_platform_depopulate instead.
This allows for a bit of driver clean-up, jrpdev is no longer needed.

Logic changes a bit too:
-exit in case of_platform_populate fails, since currently even QI backend
depends on JR; true, we no longer support the case when "some" of the JR
DT nodes are incorrect
-when cleaning up, caam_remove() would also depopulate RTIC in case
it would have been populated somewhere else - not the case for now

Cc: <stable@vger.kernel.org>
Fixes: 313ea293e9c4d ("crypto: caam - Add Platform driver for Job Ring")
Reported-by: Russell King <rmk+kernel@armlinux.org.uk>
Suggested-by: Rob Herring <robh+dt@kernel.org>
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/ctrl.c   | 63 +++++++++++-------------------------
 drivers/crypto/caam/intern.h |  1 -
 2 files changed, 19 insertions(+), 45 deletions(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index fef39f9f41ee..220f94bd1635 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -301,15 +301,13 @@ static int caam_remove(struct platform_device *pdev)
 	struct device *ctrldev;
 	struct caam_drv_private *ctrlpriv;
 	struct caam_ctrl __iomem *ctrl;
-	int ring;
 
 	ctrldev = &pdev->dev;
 	ctrlpriv = dev_get_drvdata(ctrldev);
 	ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
 
-	/* Remove platform devices for JobRs */
-	for (ring = 0; ring < ctrlpriv->total_jobrs; ring++)
-		of_device_unregister(ctrlpriv->jrpdev[ring]);
+	/* Remove platform devices under the crypto node */
+	of_platform_depopulate(ctrldev);
 
 	/* De-initialize RNG state handles initialized by this driver. */
 	if (ctrlpriv->rng4_sh_init)
@@ -418,10 +416,21 @@ DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n");
 DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n");
 #endif
 
+static const struct of_device_id caam_match[] = {
+	{
+		.compatible = "fsl,sec-v4.0",
+	},
+	{
+		.compatible = "fsl,sec4.0",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, caam_match);
+
 /* Probe routine for CAAM top (controller) level */
 static int caam_probe(struct platform_device *pdev)
 {
-	int ret, ring, ridx, rspec, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
+	int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
 	u64 caam_id;
 	struct device *dev;
 	struct device_node *nprop, *np;
@@ -597,47 +606,24 @@ static int caam_probe(struct platform_device *pdev)
 		goto iounmap_ctrl;
 	}
 
-	/*
-	 * Detect and enable JobRs
-	 * First, find out how many ring spec'ed, allocate references
-	 * for all, then go probe each one.
-	 */
-	rspec = 0;
-	for_each_available_child_of_node(nprop, np)
-		if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
-		    of_device_is_compatible(np, "fsl,sec4.0-job-ring"))
-			rspec++;
-
-	ctrlpriv->jrpdev = devm_kcalloc(&pdev->dev, rspec,
-					sizeof(*ctrlpriv->jrpdev), GFP_KERNEL);
-	if (ctrlpriv->jrpdev == NULL) {
-		ret = -ENOMEM;
+	ret = of_platform_populate(nprop, caam_match, NULL, dev);
+	if (ret) {
+		dev_err(dev, "JR platform devices creation error\n");
 		goto iounmap_ctrl;
 	}
 
 	ring = 0;
-	ridx = 0;
-	ctrlpriv->total_jobrs = 0;
 	for_each_available_child_of_node(nprop, np)
 		if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") ||
 		    of_device_is_compatible(np, "fsl,sec4.0-job-ring")) {
-			ctrlpriv->jrpdev[ring] =
-				of_platform_device_create(np, NULL, dev);
-			if (!ctrlpriv->jrpdev[ring]) {
-				pr_warn("JR physical index %d: Platform device creation error\n",
-					ridx);
-				ridx++;
-				continue;
-			}
 			ctrlpriv->jr[ring] = (struct caam_job_ring __iomem __force *)
 					     ((__force uint8_t *)ctrl +
-					     (ridx + JR_BLOCK_NUMBER) *
+					     (ring + JR_BLOCK_NUMBER) *
 					      BLOCK_OFFSET
 					     );
 			ctrlpriv->total_jobrs++;
 			ring++;
-			ridx++;
-	}
+		}
 
 	/* Check to see if QI present. If so, enable */
 	ctrlpriv->qi_present =
@@ -847,17 +833,6 @@ disable_caam_ipg:
 	return ret;
 }
 
-static struct of_device_id caam_match[] = {
-	{
-		.compatible = "fsl,sec-v4.0",
-	},
-	{
-		.compatible = "fsl,sec4.0",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, caam_match);
-
 static struct platform_driver caam_driver = {
 	.driver = {
 		.name = "caam",
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index e2bcacc1a921..dbed8baeebe5 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -66,7 +66,6 @@ struct caam_drv_private_jr {
 struct caam_drv_private {
 
 	struct device *dev;
-	struct platform_device **jrpdev; /* Alloc'ed array per sub-device */
 	struct platform_device *pdev;
 
 	/* Physical-presence section */

From 33fa46d7b310e06d2cb2ab5417c100af120bfb65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Mon, 3 Apr 2017 18:30:07 +0300
Subject: [PATCH 230/410] crypto: caam - fix invalid dereference in
 caam_rsa_init_tfm()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case caam_jr_alloc() fails, ctx->dev carries the error code,
thus accessing it with dev_err() is incorrect.

Cc: <stable@vger.kernel.org> # 4.8+
Fixes: 8c419778ab57e ("crypto: caam - add support for RSA algorithm")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caampkc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 32100c4851dd..49cbdcba7883 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -506,7 +506,7 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
 	ctx->dev = caam_jr_alloc();
 
 	if (IS_ERR(ctx->dev)) {
-		dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n");
+		pr_err("Job Ring Device allocation for transform failed\n");
 		return PTR_ERR(ctx->dev);
 	}
 

From 40c98cb57cdbc377456116ad4582c89e329721b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Wed, 5 Apr 2017 11:41:03 +0300
Subject: [PATCH 231/410] crypto: caam - fix RNG deinstantiation error checking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RNG instantiation was previously fixed by
commit 62743a4145bb9 ("crypto: caam - fix RNG init descriptor ret. code checking")
while deinstantiation was not addressed.

Since the descriptors used are similar, in the sense that they both end
with a JUMP HALT command, checking for errors should be similar too,
i.e. status code 7000_0000h should be considered successful.

Cc: <stable@vger.kernel.org> # 3.13+
Fixes: 1005bccd7a4a6 ("crypto: caam - enable instantiation of all RNG4 state handles")
Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/ctrl.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 220f94bd1635..5d7f73d60515 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -281,7 +281,8 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
 			/* Try to run it through DECO0 */
 			ret = run_descriptor_deco0(ctrldev, desc, &status);
 
-			if (ret || status) {
+			if (ret ||
+			    (status && status != JRSTA_SSRC_JUMP_HALT_CC)) {
 				dev_err(ctrldev,
 					"Failed to deinstantiate RNG4 SH%d\n",
 					sh_idx);

From 62277de758b155dc04b78f195a1cb5208c37b2df Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Date: Fri, 17 Jun 2016 17:33:59 +0000
Subject: [PATCH 232/410] ring-buffer: Fix return value check in
 test_ringbuffer()

In case of error, the function kthread_run() returns ERR_PTR()
and never returns NULL. The NULL test in the return value check
should be replaced with IS_ERR().

Link: http://lkml.kernel.org/r/1466184839-14927-1-git-send-email-weiyj_lk@163.com

Cc: stable@vger.kernel.org
Fixes: 6c43e554a ("ring-buffer: Add ring buffer startup selftest")
Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 96fc3c043ad6..54e7a90db848 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4826,9 +4826,9 @@ static __init int test_ringbuffer(void)
 		rb_data[cpu].cnt = cpu;
 		rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
 						 "rbtester/%d", cpu);
-		if (WARN_ON(!rb_threads[cpu])) {
+		if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
 			pr_cont("FAILED\n");
-			ret = -1;
+			ret = PTR_ERR(rb_threads[cpu]);
 			goto out_free;
 		}
 
@@ -4838,9 +4838,9 @@ static __init int test_ringbuffer(void)
 
 	/* Now create the rb hammer! */
 	rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
-	if (WARN_ON(!rb_hammer)) {
+	if (WARN_ON(IS_ERR(rb_hammer))) {
 		pr_cont("FAILED\n");
-		ret = -1;
+		ret = PTR_ERR(rb_hammer);
 		goto out_free;
 	}
 

From ef62a2d81f73d9cddef14bc3d9097a57010d551c Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 31 Mar 2017 10:37:44 +0100
Subject: [PATCH 233/410] metag/usercopy: Drop unused macros

Metag's lib/usercopy.c has a bunch of copy_from_user macros for larger
copies between 5 and 16 bytes which are completely unused. Before fixing
zeroing lets drop these macros so there is less to fix.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-metag@vger.kernel.org
Cc: stable@vger.kernel.org
---
 arch/metag/lib/usercopy.c | 113 --------------------------------------
 1 file changed, 113 deletions(-)

diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index b3ebfe9c8e88..b4eb1f17069f 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -651,119 +651,6 @@ EXPORT_SYMBOL(__copy_user);
 #define __asm_copy_from_user_4(to, from, ret) \
 	__asm_copy_from_user_4x_cont(to, from, ret, "", "", "")
 
-#define __asm_copy_from_user_5(to, from, ret) \
-	__asm_copy_from_user_4x_cont(to, from, ret,	\
-		"	GETB D1Ar1,[%1++]\n"		\
-		"4:	SETB [%0++],D1Ar1\n",		\
-		"5:	ADD  %2,%2,#1\n"		\
-		"	SETB [%0++],D1Ar1\n",		\
-		"	.long 4b,5b\n")
-
-#define __asm_copy_from_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-	__asm_copy_from_user_4x_cont(to, from, ret,	\
-		"	GETW D1Ar1,[%1++]\n"		\
-		"4:	SETW [%0++],D1Ar1\n" COPY,	\
-		"5:	ADD  %2,%2,#2\n"		\
-		"	SETW [%0++],D1Ar1\n" FIXUP,	\
-		"	.long 4b,5b\n" TENTRY)
-
-#define __asm_copy_from_user_6(to, from, ret) \
-	__asm_copy_from_user_6x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_7(to, from, ret) \
-	__asm_copy_from_user_6x_cont(to, from, ret,	\
-		"	GETB D1Ar1,[%1++]\n"		\
-		"6:	SETB [%0++],D1Ar1\n",		\
-		"7:	ADD  %2,%2,#1\n"		\
-		"	SETB [%0++],D1Ar1\n",		\
-		"	.long 6b,7b\n")
-
-#define __asm_copy_from_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-	__asm_copy_from_user_4x_cont(to, from, ret,	\
-		"	GETD D1Ar1,[%1++]\n"		\
-		"4:	SETD [%0++],D1Ar1\n" COPY,	\
-		"5:	ADD  %2,%2,#4\n"			\
-		"	SETD [%0++],D1Ar1\n" FIXUP,		\
-		"	.long 4b,5b\n" TENTRY)
-
-#define __asm_copy_from_user_8(to, from, ret) \
-	__asm_copy_from_user_8x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_9(to, from, ret) \
-	__asm_copy_from_user_8x_cont(to, from, ret,	\
-		"	GETB D1Ar1,[%1++]\n"		\
-		"6:	SETB [%0++],D1Ar1\n",		\
-		"7:	ADD  %2,%2,#1\n"		\
-		"	SETB [%0++],D1Ar1\n",		\
-		"	.long 6b,7b\n")
-
-#define __asm_copy_from_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-	__asm_copy_from_user_8x_cont(to, from, ret,	\
-		"	GETW D1Ar1,[%1++]\n"		\
-		"6:	SETW [%0++],D1Ar1\n" COPY,	\
-		"7:	ADD  %2,%2,#2\n"		\
-		"	SETW [%0++],D1Ar1\n" FIXUP,	\
-		"	.long 6b,7b\n" TENTRY)
-
-#define __asm_copy_from_user_10(to, from, ret) \
-	__asm_copy_from_user_10x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_11(to, from, ret)		\
-	__asm_copy_from_user_10x_cont(to, from, ret,	\
-		"	GETB D1Ar1,[%1++]\n"		\
-		"8:	SETB [%0++],D1Ar1\n",		\
-		"9:	ADD  %2,%2,#1\n"		\
-		"	SETB [%0++],D1Ar1\n",		\
-		"	.long 8b,9b\n")
-
-#define __asm_copy_from_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-	__asm_copy_from_user_8x_cont(to, from, ret,	\
-		"	GETD D1Ar1,[%1++]\n"		\
-		"6:	SETD [%0++],D1Ar1\n" COPY,	\
-		"7:	ADD  %2,%2,#4\n"		\
-		"	SETD [%0++],D1Ar1\n" FIXUP,	\
-		"	.long 6b,7b\n" TENTRY)
-
-#define __asm_copy_from_user_12(to, from, ret) \
-	__asm_copy_from_user_12x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_13(to, from, ret) \
-	__asm_copy_from_user_12x_cont(to, from, ret,	\
-		"	GETB D1Ar1,[%1++]\n"		\
-		"8:	SETB [%0++],D1Ar1\n",		\
-		"9:	ADD  %2,%2,#1\n"		\
-		"	SETB [%0++],D1Ar1\n",		\
-		"	.long 8b,9b\n")
-
-#define __asm_copy_from_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-	__asm_copy_from_user_12x_cont(to, from, ret,	\
-		"	GETW D1Ar1,[%1++]\n"		\
-		"8:	SETW [%0++],D1Ar1\n" COPY,	\
-		"9:	ADD  %2,%2,#2\n"		\
-		"	SETW [%0++],D1Ar1\n" FIXUP,	\
-		"	.long 8b,9b\n" TENTRY)
-
-#define __asm_copy_from_user_14(to, from, ret) \
-	__asm_copy_from_user_14x_cont(to, from, ret, "", "", "")
-
-#define __asm_copy_from_user_15(to, from, ret) \
-	__asm_copy_from_user_14x_cont(to, from, ret,	\
-		"	GETB D1Ar1,[%1++]\n"		\
-		"10:	SETB [%0++],D1Ar1\n",		\
-		"11:	ADD  %2,%2,#1\n"		\
-		"	SETB [%0++],D1Ar1\n",		\
-		"	.long 10b,11b\n")
-
-#define __asm_copy_from_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
-	__asm_copy_from_user_12x_cont(to, from, ret,	\
-		"	GETD D1Ar1,[%1++]\n"		\
-		"8:	SETD [%0++],D1Ar1\n" COPY,	\
-		"9:	ADD  %2,%2,#4\n"		\
-		"	SETD [%0++],D1Ar1\n" FIXUP,	\
-		"	.long 8b,9b\n" TENTRY)
-
-#define __asm_copy_from_user_16(to, from, ret) \
-	__asm_copy_from_user_16x_cont(to, from, ret, "", "", "")
 
 #define __asm_copy_from_user_8x64(to, from, ret) \
 	asm volatile (				\

From 2257211942bbbf6c798ab70b487d7e62f7835a1a Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 31 Mar 2017 11:23:18 +0100
Subject: [PATCH 234/410] metag/usercopy: Fix alignment error checking

Fix the error checking of the alignment adjustment code in
raw_copy_from_user(), which mistakenly considers it safe to skip the
error check when aligning the source buffer on a 2 or 4 byte boundary.

If the destination buffer was unaligned it may have started to copy
using byte or word accesses, which could well be at the start of a new
(valid) source page. This would result in it appearing to have copied 1
or 2 bytes at the end of the first (invalid) page rather than none at
all.

Fixes: 373cd784d0fc ("metag: Memory handling")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
Cc: stable@vger.kernel.org
---
 arch/metag/lib/usercopy.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index b4eb1f17069f..a6ced9691ddb 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -717,6 +717,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 	if ((unsigned long) src & 1) {
 		__asm_copy_from_user_1(dst, src, retn);
 		n--;
+		if (retn)
+			goto copy_exception_bytes;
 	}
 	if ((unsigned long) dst & 1) {
 		/* Worst case - byte copy */
@@ -730,6 +732,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 	if (((unsigned long) src & 2) && n >= 2) {
 		__asm_copy_from_user_2(dst, src, retn);
 		n -= 2;
+		if (retn)
+			goto copy_exception_bytes;
 	}
 	if ((unsigned long) dst & 2) {
 		/* Second worst case - word copy */
@@ -741,12 +745,6 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 		}
 	}
 
-	/* We only need one check after the unalignment-adjustments,
-	   because if both adjustments were done, either both or
-	   neither reference had an exception.  */
-	if (retn != 0)
-		goto copy_exception_bytes;
-
 #ifdef USE_RAPF
 	/* 64 bit copy loop */
 	if (!(((unsigned long) src | (unsigned long) dst) & 7)) {

From fb8ea062a8f2e85256e13f55696c5c5f0dfdcc8b Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 31 Mar 2017 13:35:01 +0100
Subject: [PATCH 235/410] metag/usercopy: Add early abort to copy_to_user

When copying to userland on Meta, if any faults are encountered
immediately abort the copy instead of continuing on and repeatedly
faulting, and worse potentially copying further bytes successfully to
subsequent valid pages.

Fixes: 373cd784d0fc ("metag: Memory handling")
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
Cc: stable@vger.kernel.org
---
 arch/metag/lib/usercopy.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index a6ced9691ddb..714d8562aa20 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -538,23 +538,31 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
 	if ((unsigned long) src & 1) {
 		__asm_copy_to_user_1(dst, src, retn);
 		n--;
+		if (retn)
+			return retn + n;
 	}
 	if ((unsigned long) dst & 1) {
 		/* Worst case - byte copy */
 		while (n > 0) {
 			__asm_copy_to_user_1(dst, src, retn);
 			n--;
+			if (retn)
+				return retn + n;
 		}
 	}
 	if (((unsigned long) src & 2) && n >= 2) {
 		__asm_copy_to_user_2(dst, src, retn);
 		n -= 2;
+		if (retn)
+			return retn + n;
 	}
 	if ((unsigned long) dst & 2) {
 		/* Second worst case - word copy */
 		while (n >= 2) {
 			__asm_copy_to_user_2(dst, src, retn);
 			n -= 2;
+			if (retn)
+				return retn + n;
 		}
 	}
 
@@ -569,6 +577,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
 		while (n >= 8) {
 			__asm_copy_to_user_8x64(dst, src, retn);
 			n -= 8;
+			if (retn)
+				return retn + n;
 		}
 	}
 	if (n >= RAPF_MIN_BUF_SIZE) {
@@ -581,6 +591,8 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
 		while (n >= 8) {
 			__asm_copy_to_user_8x64(dst, src, retn);
 			n -= 8;
+			if (retn)
+				return retn + n;
 		}
 	}
 #endif
@@ -588,11 +600,15 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
 	while (n >= 16) {
 		__asm_copy_to_user_16(dst, src, retn);
 		n -= 16;
+		if (retn)
+			return retn + n;
 	}
 
 	while (n >= 4) {
 		__asm_copy_to_user_4(dst, src, retn);
 		n -= 4;
+		if (retn)
+			return retn + n;
 	}
 
 	switch (n) {
@@ -609,6 +625,10 @@ unsigned long __copy_user(void __user *pdst, const void *psrc,
 		break;
 	}
 
+	/*
+	 * If we get here, retn correctly reflects the number of failing
+	 * bytes.
+	 */
 	return retn;
 }
 EXPORT_SYMBOL(__copy_user);

From 3ebfdf082184d04f6e73b30cd9446613dc7f8c02 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 4 Apr 2017 13:39:55 +0800
Subject: [PATCH 236/410] sctp: get sock from transport in
 sctp_transport_update_pmtu

This patch is almost to revert commit 02f3d4ce9e81 ("sctp: Adjust PMTU
updates to accomodate route invalidation."). As t->asoc can't be NULL
in sctp_transport_update_pmtu, it could get sk from asoc, and no need
to pass sk into that function.

It is also to remove some duplicated codes from that function.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h    |  5 ++---
 include/net/sctp/structs.h |  6 +++---
 net/sctp/associola.c       |  6 +++---
 net/sctp/input.c           |  4 ++--
 net/sctp/output.c          |  4 ++--
 net/sctp/socket.c          |  6 +++---
 net/sctp/transport.c       | 19 +++++++------------
 7 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d75caa7a629b..069582ee5d7f 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -448,10 +448,9 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
 	return frag;
 }
 
-static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_association *asoc)
+static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc)
 {
-
-	sctp_assoc_sync_pmtu(sk, asoc);
+	sctp_assoc_sync_pmtu(asoc);
 	asoc->pmtu_pending = 0;
 }
 
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a127b7c2c3c9..138f8615acf0 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -952,8 +952,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *, sctp_lower_cwnd_t);
 void sctp_transport_burst_limited(struct sctp_transport *);
 void sctp_transport_burst_reset(struct sctp_transport *);
 unsigned long sctp_transport_timeout(struct sctp_transport *);
-void sctp_transport_reset(struct sctp_transport *);
-void sctp_transport_update_pmtu(struct sock *, struct sctp_transport *, u32);
+void sctp_transport_reset(struct sctp_transport *t);
+void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
@@ -1954,7 +1954,7 @@ void sctp_assoc_update(struct sctp_association *old,
 
 __u32 sctp_association_get_next_tsn(struct sctp_association *);
 
-void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
+void sctp_assoc_sync_pmtu(struct sctp_association *asoc);
 void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
 void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
 void sctp_assoc_set_primary(struct sctp_association *,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0b26df5f6188..a9708da28eb5 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1412,7 +1412,7 @@ sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
 /* Update the association's pmtu and frag_point by going through all the
  * transports. This routine is called when a transport's PMTU has changed.
  */
-void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
+void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
 {
 	struct sctp_transport *t;
 	__u32 pmtu = 0;
@@ -1424,8 +1424,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
 	list_for_each_entry(t, &asoc->peer.transport_addr_list,
 				transports) {
 		if (t->pmtu_pending && t->dst) {
-			sctp_transport_update_pmtu(sk, t,
-						   SCTP_TRUNC4(dst_mtu(t->dst)));
+			sctp_transport_update_pmtu(
+					t, SCTP_TRUNC4(dst_mtu(t->dst)));
 			t->pmtu_pending = 0;
 		}
 		if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2a28ab20487f..0e06a278d2a9 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -401,10 +401,10 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 
 	if (t->param_flags & SPP_PMTUD_ENABLE) {
 		/* Update transports view of the MTU */
-		sctp_transport_update_pmtu(sk, t, pmtu);
+		sctp_transport_update_pmtu(t, pmtu);
 
 		/* Update association pmtu. */
-		sctp_assoc_sync_pmtu(sk, asoc);
+		sctp_assoc_sync_pmtu(asoc);
 	}
 
 	/* Retransmit with the new pmtu setting.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index ec4d50a38713..1409a875ad8e 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -105,10 +105,10 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 	if (!sctp_transport_dst_check(tp)) {
 		sctp_transport_route(tp, NULL, sctp_sk(sk));
 		if (asoc->param_flags & SPP_PMTUD_ENABLE)
-			sctp_assoc_sync_pmtu(sk, asoc);
+			sctp_assoc_sync_pmtu(asoc);
 	} else if (!sctp_transport_pmtu_check(tp)) {
 		if (asoc->param_flags & SPP_PMTUD_ENABLE)
-			sctp_assoc_sync_pmtu(sk, asoc);
+			sctp_assoc_sync_pmtu(asoc);
 	}
 
 	/* If there a is a prepend chunk stick it on the list before
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 12fbae2c1002..c1401f43d40f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1907,7 +1907,7 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
 	}
 
 	if (asoc->pmtu_pending)
-		sctp_assoc_pending_pmtu(sk, asoc);
+		sctp_assoc_pending_pmtu(asoc);
 
 	/* If fragmentation is disabled and the message length exceeds the
 	 * association fragmentation point, return EMSGSIZE.  The I-D
@@ -2435,7 +2435,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 	if ((params->spp_flags & SPP_PMTUD_DISABLE) && params->spp_pathmtu) {
 		if (trans) {
 			trans->pathmtu = params->spp_pathmtu;
-			sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+			sctp_assoc_sync_pmtu(asoc);
 		} else if (asoc) {
 			asoc->pathmtu = params->spp_pathmtu;
 		} else {
@@ -2451,7 +2451,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 				(trans->param_flags & ~SPP_PMTUD) | pmtud_change;
 			if (update) {
 				sctp_transport_pmtu(trans, sctp_opt2sk(sp));
-				sctp_assoc_sync_pmtu(sctp_opt2sk(sp), asoc);
+				sctp_assoc_sync_pmtu(asoc);
 			}
 		} else if (asoc) {
 			asoc->param_flags =
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 3379668af368..721eeebfcd8a 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -251,14 +251,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
-void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 pmtu)
+void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 {
-	struct dst_entry *dst;
+	struct dst_entry *dst = sctp_transport_dst_check(t);
 
 	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
 		pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
-			__func__, pmtu,
-			SCTP_DEFAULT_MINSEGMENT);
+			__func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
 		/* Use default minimum segment size and disable
 		 * pmtu discovery on this transport.
 		 */
@@ -267,17 +266,13 @@ void sctp_transport_update_pmtu(struct sock *sk, struct sctp_transport *t, u32 p
 		t->pathmtu = pmtu;
 	}
 
-	dst = sctp_transport_dst_check(t);
-	if (!dst)
-		t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
-
 	if (dst) {
-		dst->ops->update_pmtu(dst, sk, NULL, pmtu);
-
+		dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
 		dst = sctp_transport_dst_check(t);
-		if (!dst)
-			t->af_specific->get_dst(t, &t->saddr, &t->fl, sk);
 	}
+
+	if (!dst)
+		t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
 }
 
 /* Caches the dst entry and source address for a transport's destination

From 563ddc1076109f2b3f88e6d355eab7b6fd4662cb Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 31 Mar 2017 11:14:02 +0100
Subject: [PATCH 237/410] metag/usercopy: Zero rest of buffer from
 copy_from_user

Currently we try to zero the destination for a failed read from userland
in fixup code in the usercopy.c macros. The rest of the destination
buffer is then zeroed from __copy_user_zeroing(), which is used for both
copy_from_user() and __copy_from_user().

Unfortunately we fail to zero in the fixup code as D1Ar1 is set to 0
before the fixup code entry labels, and __copy_from_user() shouldn't even
be zeroing the rest of the buffer.

Move the zeroing out into copy_from_user() and rename
__copy_user_zeroing() to raw_copy_from_user() since it no longer does
any zeroing. This also conveniently matches the name needed for
RAW_COPY_USER support in a later patch.

Fixes: 373cd784d0fc ("metag: Memory handling")
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
Cc: stable@vger.kernel.org
---
 arch/metag/include/asm/uaccess.h | 15 +++++----
 arch/metag/lib/usercopy.c        | 57 ++++++++++----------------------
 2 files changed, 26 insertions(+), 46 deletions(-)

diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
index 273e61225c27..07238b39638c 100644
--- a/arch/metag/include/asm/uaccess.h
+++ b/arch/metag/include/asm/uaccess.h
@@ -197,20 +197,21 @@ extern long __must_check strnlen_user(const char __user *src, long count);
 
 #define strlen_user(str) strnlen_user(str, 32767)
 
-extern unsigned long __must_check __copy_user_zeroing(void *to,
-						      const void __user *from,
-						      unsigned long n);
+extern unsigned long raw_copy_from_user(void *to, const void __user *from,
+					unsigned long n);
 
 static inline unsigned long
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
+	unsigned long res = n;
 	if (likely(access_ok(VERIFY_READ, from, n)))
-		return __copy_user_zeroing(to, from, n);
-	memset(to, 0, n);
-	return n;
+		res = raw_copy_from_user(to, from, n);
+	if (unlikely(res))
+		memset(to + (n - res), 0, res);
+	return res;
 }
 
-#define __copy_from_user(to, from, n) __copy_user_zeroing(to, from, n)
+#define __copy_from_user(to, from, n) raw_copy_from_user(to, from, n)
 #define __copy_from_user_inatomic __copy_from_user
 
 extern unsigned long __must_check __copy_user(void __user *to,
diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index 714d8562aa20..e1d553872fd7 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -29,7 +29,6 @@
 		COPY						 \
 		"1:\n"						 \
 		"	.section .fixup,\"ax\"\n"		 \
-		"	MOV D1Ar1,#0\n"				 \
 		FIXUP						 \
 		"	MOVT    D1Ar1,#HI(1b)\n"		 \
 		"	JUMP    D1Ar1,#LO(1b)\n"		 \
@@ -637,16 +636,14 @@ EXPORT_SYMBOL(__copy_user);
 	__asm_copy_user_cont(to, from, ret,	\
 		"	GETB D1Ar1,[%1++]\n"	\
 		"2:	SETB [%0++],D1Ar1\n",	\
-		"3:	ADD  %2,%2,#1\n"	\
-		"	SETB [%0++],D1Ar1\n",	\
+		"3:	ADD  %2,%2,#1\n",	\
 		"	.long 2b,3b\n")
 
 #define __asm_copy_from_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
 	__asm_copy_user_cont(to, from, ret,		\
 		"	GETW D1Ar1,[%1++]\n"		\
 		"2:	SETW [%0++],D1Ar1\n" COPY,	\
-		"3:	ADD  %2,%2,#2\n"		\
-		"	SETW [%0++],D1Ar1\n" FIXUP,	\
+		"3:	ADD  %2,%2,#2\n" FIXUP,		\
 		"	.long 2b,3b\n" TENTRY)
 
 #define __asm_copy_from_user_2(to, from, ret) \
@@ -656,32 +653,26 @@ EXPORT_SYMBOL(__copy_user);
 	__asm_copy_from_user_2x_cont(to, from, ret,	\
 		"	GETB D1Ar1,[%1++]\n"		\
 		"4:	SETB [%0++],D1Ar1\n",		\
-		"5:	ADD  %2,%2,#1\n"		\
-		"	SETB [%0++],D1Ar1\n",		\
+		"5:	ADD  %2,%2,#1\n",		\
 		"	.long 4b,5b\n")
 
 #define __asm_copy_from_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
 	__asm_copy_user_cont(to, from, ret,		\
 		"	GETD D1Ar1,[%1++]\n"		\
 		"2:	SETD [%0++],D1Ar1\n" COPY,	\
-		"3:	ADD  %2,%2,#4\n"		\
-		"	SETD [%0++],D1Ar1\n" FIXUP,	\
+		"3:	ADD  %2,%2,#4\n" FIXUP,		\
 		"	.long 2b,3b\n" TENTRY)
 
 #define __asm_copy_from_user_4(to, from, ret) \
 	__asm_copy_from_user_4x_cont(to, from, ret, "", "", "")
 
-
 #define __asm_copy_from_user_8x64(to, from, ret) \
 	asm volatile (				\
 		"	GETL D0Ar2,D1Ar1,[%1++]\n"	\
 		"2:	SETL [%0++],D0Ar2,D1Ar1\n"	\
 		"1:\n"					\
 		"	.section .fixup,\"ax\"\n"	\
-		"	MOV D1Ar1,#0\n"			\
-		"	MOV D0Ar2,#0\n"			\
 		"3:	ADD  %2,%2,#8\n"		\
-		"	SETL [%0++],D0Ar2,D1Ar1\n"	\
 		"	MOVT    D0Ar2,#HI(1b)\n"	\
 		"	JUMP    D0Ar2,#LO(1b)\n"	\
 		"	.previous\n"			\
@@ -721,11 +712,12 @@ EXPORT_SYMBOL(__copy_user);
 		"SUB	%1, %1, #4\n")
 
 
-/* Copy from user to kernel, zeroing the bytes that were inaccessible in
-   userland.  The return-value is the number of bytes that were
-   inaccessible.  */
-unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
-				  unsigned long n)
+/*
+ * Copy from user to kernel. The return-value is the number of bytes that were
+ * inaccessible.
+ */
+unsigned long raw_copy_from_user(void *pdst, const void __user *psrc,
+				 unsigned long n)
 {
 	register char *dst asm ("A0.2") = pdst;
 	register const char __user *src asm ("A1.2") = psrc;
@@ -738,7 +730,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 		__asm_copy_from_user_1(dst, src, retn);
 		n--;
 		if (retn)
-			goto copy_exception_bytes;
+			return retn + n;
 	}
 	if ((unsigned long) dst & 1) {
 		/* Worst case - byte copy */
@@ -746,14 +738,14 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 			__asm_copy_from_user_1(dst, src, retn);
 			n--;
 			if (retn)
-				goto copy_exception_bytes;
+				return retn + n;
 		}
 	}
 	if (((unsigned long) src & 2) && n >= 2) {
 		__asm_copy_from_user_2(dst, src, retn);
 		n -= 2;
 		if (retn)
-			goto copy_exception_bytes;
+			return retn + n;
 	}
 	if ((unsigned long) dst & 2) {
 		/* Second worst case - word copy */
@@ -761,7 +753,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 			__asm_copy_from_user_2(dst, src, retn);
 			n -= 2;
 			if (retn)
-				goto copy_exception_bytes;
+				return retn + n;
 		}
 	}
 
@@ -777,7 +769,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 			__asm_copy_from_user_8x64(dst, src, retn);
 			n -= 8;
 			if (retn)
-				goto copy_exception_bytes;
+				return retn + n;
 		}
 	}
 
@@ -793,7 +785,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 			__asm_copy_from_user_8x64(dst, src, retn);
 			n -= 8;
 			if (retn)
-				goto copy_exception_bytes;
+				return retn + n;
 		}
 	}
 #endif
@@ -803,7 +795,7 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 		n -= 4;
 
 		if (retn)
-			goto copy_exception_bytes;
+			return retn + n;
 	}
 
 	/* If we get here, there were no memory read faults.  */
@@ -829,21 +821,8 @@ unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
 	/* If we get here, retn correctly reflects the number of failing
 	   bytes.  */
 	return retn;
-
- copy_exception_bytes:
-	/* We already have "retn" bytes cleared, and need to clear the
-	   remaining "n" bytes.  A non-optimized simple byte-for-byte in-line
-	   memset is preferred here, since this isn't speed-critical code and
-	   we'd rather have this a leaf-function than calling memset.  */
-	{
-		char *endp;
-		for (endp = dst + n; dst < endp; dst++)
-			*dst = 0;
-	}
-
-	return retn + n;
 }
-EXPORT_SYMBOL(__copy_user_zeroing);
+EXPORT_SYMBOL(raw_copy_from_user);
 
 #define __asm_clear_8x64(to, ret) \
 	asm volatile (					\

From fd40eee1290ad7add7aa665e3ce6b0f9fe9734b4 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 4 Apr 2017 11:43:26 +0100
Subject: [PATCH 238/410] metag/usercopy: Set flags before ADDZ

The fixup code for the copy_to_user rapf loops reads TXStatus.LSM_STEP
to decide how far to rewind the source pointer. There is a special case
for the last execution of an MGETL/MGETD, since it leaves LSM_STEP=0
even though the number of MGETLs/MGETDs attempted was 4. This uses ADDZ
which is conditional upon the Z condition flag, but the AND instruction
which masked the TXStatus.LSM_STEP field didn't set the condition flags
based on the result.

Fix that now by using ANDS which does set the flags, and also marking
the condition codes as clobbered by the inline assembly.

Fixes: 373cd784d0fc ("metag: Memory handling")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
Cc: stable@vger.kernel.org
---
 arch/metag/lib/usercopy.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index e1d553872fd7..4422928a1746 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -315,7 +315,7 @@
 		"	.previous\n"					\
 		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
 		: "0" (to), "1" (from), "2" (ret), "3" (n)		\
-		: "D1Ar1", "D0Ar2", "memory")
+		: "D1Ar1", "D0Ar2", "cc", "memory")
 
 /*	rewind 'to' and 'from'  pointers when a fault occurs
  *
@@ -341,7 +341,7 @@
 #define __asm_copy_to_user_64bit_rapf_loop(to,	from, ret, n, id)\
 	__asm_copy_user_64bit_rapf_loop(to, from, ret, n, id,		\
 		"LSR	D0Ar2, D0Ar2, #8\n"				\
-		"AND	D0Ar2, D0Ar2, #0x7\n"				\
+		"ANDS	D0Ar2, D0Ar2, #0x7\n"				\
 		"ADDZ	D0Ar2, D0Ar2, #4\n"				\
 		"SUB	D0Ar2, D0Ar2, #1\n"				\
 		"MOV	D1Ar1, #4\n"					\
@@ -486,7 +486,7 @@
 		"	.previous\n"					\
 		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
 		: "0" (to), "1" (from), "2" (ret), "3" (n)		\
-		: "D1Ar1", "D0Ar2", "memory")
+		: "D1Ar1", "D0Ar2", "cc", "memory")
 
 /*	rewind 'to' and 'from'  pointers when a fault occurs
  *
@@ -512,7 +512,7 @@
 #define __asm_copy_to_user_32bit_rapf_loop(to, from, ret, n, id)\
 	__asm_copy_user_32bit_rapf_loop(to, from, ret, n, id,		\
 		"LSR	D0Ar2, D0Ar2, #8\n"				\
-		"AND	D0Ar2, D0Ar2, #0x7\n"				\
+		"ANDS	D0Ar2, D0Ar2, #0x7\n"				\
 		"ADDZ	D0Ar2, D0Ar2, #4\n"				\
 		"SUB	D0Ar2, D0Ar2, #1\n"				\
 		"MOV	D1Ar1, #4\n"					\

From 2c0b1df88b987a12d95ea1d6beaf01894f3cc725 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Mon, 3 Apr 2017 17:41:40 +0100
Subject: [PATCH 239/410] metag/usercopy: Fix src fixup in from user rapf loops

The fixup code to rewind the source pointer in
__asm_copy_from_user_{32,64}bit_rapf_loop() always rewound the source by
a single unit (4 or 8 bytes), however this is insufficient if the fault
didn't occur on the first load in the loop, as the source pointer will
have been incremented but nothing will have been stored until all 4
register [pairs] are loaded.

Read the LSM_STEP field of TXSTATUS (which is already loaded into a
register), a bit like the copy_to_user versions, to determine how many
iterations of MGET[DL] have taken place, all of which need rewinding.

Fixes: 373cd784d0fc ("metag: Memory handling")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
Cc: stable@vger.kernel.org
---
 arch/metag/lib/usercopy.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index 4422928a1746..e09c95ba028c 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -687,29 +687,49 @@ EXPORT_SYMBOL(__copy_user);
  *
  *	Rationale:
  *		A fault occurs while reading from user buffer, which is the
- *		source. Since the fault is at a single address, we only
- *		need to rewind by 8 bytes.
+ *		source.
  *		Since we don't write to kernel buffer until we read first,
  *		the kernel buffer is at the right state and needn't be
- *		corrected.
+ *		corrected, but the source must be rewound to the beginning of
+ *		the block, which is LSM_STEP*8 bytes.
+ *		LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ *		and stored in D0Ar2
+ *
+ *		NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ *			LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ *			a fault happens at the 4th write, LSM_STEP will be 0
+ *			instead of 4. The code copes with that.
  */
 #define __asm_copy_from_user_64bit_rapf_loop(to, from, ret, n, id)	\
 	__asm_copy_user_64bit_rapf_loop(to, from, ret, n, id,		\
-		"SUB	%1, %1, #8\n")
+		"LSR	D0Ar2, D0Ar2, #5\n"				\
+		"ANDS	D0Ar2, D0Ar2, #0x38\n"				\
+		"ADDZ	D0Ar2, D0Ar2, #32\n"				\
+		"SUB	%1, %1, D0Ar2\n")
 
 /*	rewind 'from' pointer when a fault occurs
  *
  *	Rationale:
  *		A fault occurs while reading from user buffer, which is the
- *		source. Since the fault is at a single address, we only
- *		need to rewind by 4 bytes.
+ *		source.
  *		Since we don't write to kernel buffer until we read first,
  *		the kernel buffer is at the right state and needn't be
- *		corrected.
+ *		corrected, but the source must be rewound to the beginning of
+ *		the block, which is LSM_STEP*4 bytes.
+ *		LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ *		and stored in D0Ar2
+ *
+ *		NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ *			LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ *			a fault happens at the 4th write, LSM_STEP will be 0
+ *			instead of 4. The code copes with that.
  */
 #define __asm_copy_from_user_32bit_rapf_loop(to, from, ret, n, id)	\
 	__asm_copy_user_32bit_rapf_loop(to, from, ret, n, id,		\
-		"SUB	%1, %1, #4\n")
+		"LSR	D0Ar2, D0Ar2, #6\n"				\
+		"ANDS	D0Ar2, D0Ar2, #0x1c\n"				\
+		"ADDZ	D0Ar2, D0Ar2, #16\n"				\
+		"SUB	%1, %1, D0Ar2\n")
 
 
 /*

From b884a190afcecdbef34ca508ea5ee88bb7c77861 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 4 Apr 2017 08:51:34 +0100
Subject: [PATCH 240/410] metag/usercopy: Add missing fixups

The rapf copy loops in the Meta usercopy code is missing some extable
entries for HTP cores with unaligned access checking enabled, where
faults occur on the instruction immediately after the faulting access.

Add the fixup labels and extable entries for these cases so that corner
case user copy failures don't cause kernel crashes.

Fixes: 373cd784d0fc ("metag: Memory handling")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: linux-metag@vger.kernel.org
Cc: stable@vger.kernel.org
---
 arch/metag/lib/usercopy.c | 84 +++++++++++++++++++++++++--------------
 1 file changed, 54 insertions(+), 30 deletions(-)

diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
index e09c95ba028c..2792fc621088 100644
--- a/arch/metag/lib/usercopy.c
+++ b/arch/metag/lib/usercopy.c
@@ -259,27 +259,31 @@
 		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
 		"22:\n"							\
 		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"SUB	%3, %3, #32\n"					\
 		"23:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"SUB	%3, %3, #32\n"					\
 		"24:\n"							\
+		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"25:\n"							\
 		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"26:\n"							\
 		"SUB	%3, %3, #32\n"					\
 		"DCACHE	[%1+#-64], D0Ar6\n"				\
 		"BR	$Lloop"id"\n"					\
 									\
 		"MOV	RAPF, %1\n"					\
-		"25:\n"							\
-		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"26:\n"							\
-		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"SUB	%3, %3, #32\n"					\
 		"27:\n"							\
 		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
 		"28:\n"							\
 		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"SUB	%0, %0, #8\n"					\
 		"29:\n"							\
+		"SUB	%3, %3, #32\n"					\
+		"30:\n"							\
+		"MGETL	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"31:\n"							\
+		"MSETL	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"32:\n"							\
+		"SUB	%0, %0, #8\n"					\
+		"33:\n"							\
 		"SETL	[%0++], D0.7, D1.7\n"				\
 		"SUB	%3, %3, #32\n"					\
 		"1:"							\
@@ -311,7 +315,11 @@
 		"	.long 26b,3b\n"					\
 		"	.long 27b,3b\n"					\
 		"	.long 28b,3b\n"					\
-		"	.long 29b,4b\n"					\
+		"	.long 29b,3b\n"					\
+		"	.long 30b,3b\n"					\
+		"	.long 31b,3b\n"					\
+		"	.long 32b,3b\n"					\
+		"	.long 33b,4b\n"					\
 		"	.previous\n"					\
 		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
 		: "0" (to), "1" (from), "2" (ret), "3" (n)		\
@@ -402,47 +410,55 @@
 		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
 		"22:\n"							\
 		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"SUB	%3, %3, #16\n"					\
 		"23:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"24:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
 		"SUB	%3, %3, #16\n"					\
-		"25:\n"							\
+		"24:\n"							\
 		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"26:\n"							\
+		"25:\n"							\
 		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"26:\n"							\
 		"SUB	%3, %3, #16\n"					\
 		"27:\n"							\
 		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
 		"28:\n"							\
 		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"29:\n"							\
+		"SUB	%3, %3, #16\n"					\
+		"30:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"31:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"32:\n"							\
 		"SUB	%3, %3, #16\n"					\
 		"DCACHE	[%1+#-64], D0Ar6\n"				\
 		"BR	$Lloop"id"\n"					\
 									\
 		"MOV	RAPF, %1\n"					\
-		"29:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"30:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"SUB	%3, %3, #16\n"					\
-		"31:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
-		"32:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"SUB	%3, %3, #16\n"					\
 		"33:\n"							\
 		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
 		"34:\n"							\
 		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"SUB	%3, %3, #16\n"					\
 		"35:\n"							\
-		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"SUB	%3, %3, #16\n"					\
 		"36:\n"							\
-		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
-		"SUB	%0, %0, #4\n"					\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
 		"37:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"38:\n"							\
+		"SUB	%3, %3, #16\n"					\
+		"39:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"40:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"41:\n"							\
+		"SUB	%3, %3, #16\n"					\
+		"42:\n"							\
+		"MGETD	D0FrT, D0.5, D0.6, D0.7, [%1++]\n"		\
+		"43:\n"							\
+		"MSETD	[%0++], D0FrT, D0.5, D0.6, D0.7\n"		\
+		"44:\n"							\
+		"SUB	%0, %0, #4\n"					\
+		"45:\n"							\
 		"SETD	[%0++], D0.7\n"					\
 		"SUB	%3, %3, #16\n"					\
 		"1:"							\
@@ -482,7 +498,15 @@
 		"	.long 34b,3b\n"					\
 		"	.long 35b,3b\n"					\
 		"	.long 36b,3b\n"					\
-		"	.long 37b,4b\n"					\
+		"	.long 37b,3b\n"					\
+		"	.long 38b,3b\n"					\
+		"	.long 39b,3b\n"					\
+		"	.long 40b,3b\n"					\
+		"	.long 41b,3b\n"					\
+		"	.long 42b,3b\n"					\
+		"	.long 43b,3b\n"					\
+		"	.long 44b,3b\n"					\
+		"	.long 45b,4b\n"					\
 		"	.previous\n"					\
 		: "=r" (to), "=r" (from), "=r" (ret), "=d" (n)		\
 		: "0" (to), "1" (from), "2" (ret), "3" (n)		\

From ecde8f36f8a05a023b9d026e9094571aab421d36 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Tue, 4 Apr 2017 14:15:39 -0700
Subject: [PATCH 241/410] tcp: fix lost retransmit SNMP under-counting

The lost retransmit SNMP stat is under-counting retransmission
that uses segment offloading. This patch fixes that so all
retransmission related SNMP counters are consistent.

Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time")
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_recovery.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 4ecb38ae8504..d8acbd9f477a 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -12,7 +12,8 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
 		/* Account for retransmits that are lost again */
 		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
 		tp->retrans_out -= tcp_skb_pcount(skb);
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
+		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT,
+			      tcp_skb_pcount(skb));
 	}
 }
 

From 2d2517ee314ef1de0517f74d06c2825fbf597ba3 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Tue, 4 Apr 2017 14:15:40 -0700
Subject: [PATCH 242/410] tcp: fix reordering SNMP under-counting

Currently the reordering SNMP counters only increase if a connection
sees a higher degree then it has previously seen. It ignores if the
reordering degree is not greater than the default system threshold.
This significantly under-counts the number of reordering events
and falsely convey that reordering is rare on the network.

This patch properly and faithfully records the number of reordering
events detected by the TCP stack, just like the comment says "this
exciting event is worth to be remembered". Note that even so TCP
still under-estimate the actual reordering events because TCP
requires TS options or certain packet sequences to detect reordering
(i.e. ACKing never-retransmitted sequence in recovery or disordered
 state).

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 97ac6776e47d..2c1f59386a7b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -878,22 +878,11 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 				  const int ts)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	if (metric > tp->reordering) {
-		int mib_idx;
+	int mib_idx;
 
+	if (metric > tp->reordering) {
 		tp->reordering = min(sysctl_tcp_max_reordering, metric);
 
-		/* This exciting event is worth to be remembered. 8) */
-		if (ts)
-			mib_idx = LINUX_MIB_TCPTSREORDER;
-		else if (tcp_is_reno(tp))
-			mib_idx = LINUX_MIB_TCPRENOREORDER;
-		else if (tcp_is_fack(tp))
-			mib_idx = LINUX_MIB_TCPFACKREORDER;
-		else
-			mib_idx = LINUX_MIB_TCPSACKREORDER;
-
-		NET_INC_STATS(sock_net(sk), mib_idx);
 #if FASTRETRANS_DEBUG > 1
 		pr_debug("Disorder%d %d %u f%u s%u rr%d\n",
 			 tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state,
@@ -906,6 +895,18 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 	}
 
 	tp->rack.reord = 1;
+
+	/* This exciting event is worth to be remembered. 8) */
+	if (ts)
+		mib_idx = LINUX_MIB_TCPTSREORDER;
+	else if (tcp_is_reno(tp))
+		mib_idx = LINUX_MIB_TCPRENOREORDER;
+	else if (tcp_is_fack(tp))
+		mib_idx = LINUX_MIB_TCPFACKREORDER;
+	else
+		mib_idx = LINUX_MIB_TCPSACKREORDER;
+
+	NET_INC_STATS(sock_net(sk), mib_idx);
 }
 
 /* This must be called before lost_out is incremented */

From c383bdd14f91562babd269aa7c36b46fee7b6c75 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 4 Apr 2017 15:56:55 -0700
Subject: [PATCH 243/410] nfp: fix potential use after free on xdp prog

We should unregister the net_device first, before we give back
our reference on xdp_prog.  Otherwise xdp_prog may be freed
before .ndo_stop() disabled the datapath.  Found by code inspection.

Fixes: ecd63a0217d5 ("nfp: add XDP support in the driver")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 9179a99563af..a41377e26c07 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3275,9 +3275,10 @@ void nfp_net_netdev_clean(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
 
+	unregister_netdev(nn->netdev);
+
 	if (nn->xdp_prog)
 		bpf_prog_put(nn->xdp_prog);
 	if (nn->bpf_offload_xdp)
 		nfp_net_xdp_offload(nn, NULL);
-	unregister_netdev(nn->netdev);
 }

From a34f83639490a5cc11a9d5c1b3773d4b6eb69a9e Mon Sep 17 00:00:00 2001
From: Min He <min.he@intel.com>
Date: Thu, 6 Apr 2017 11:01:45 +0800
Subject: [PATCH 244/410] drm/i915/gvt: set the correct default value of CTX
 STATUS PTR

Fix wrong initial csb read pointer value. This fixes the random
engine timeout issue in guest when guest boots up.

Fixes: 8453d674ae7e ("drm/i915/gvt: vGPU execlist virtualization")
Cc: stable@vger.kernel.org # v4.10+
Signed-off-by: Min He <min.he@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/execlist.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index f1f426a97aa9..d186c157f65f 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -775,7 +775,8 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 			_EL_OFFSET_STATUS_PTR);
 
 	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
-	ctx_status_ptr.read_ptr = ctx_status_ptr.write_ptr = 0x7;
+	ctx_status_ptr.read_ptr = 0;
+	ctx_status_ptr.write_ptr = 0x7;
 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 }
 

From 83bce9c2baa51e439480a713119a73d3c8b61083 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 18 Mar 2017 21:53:05 -0400
Subject: [PATCH 245/410] drm/nouveau/mpeg: mthd returns true on success now

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Fixes: 590801c1a3 ("drm/nouveau/mpeg: remove dependence on namedb/engctx lookup")
Cc: stable@vger.kernel.org # v4.3+
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c | 2 +-
 drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
index 003ac915eaad..8a8895246d26 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
@@ -198,7 +198,7 @@ nv31_mpeg_intr(struct nvkm_engine *engine)
 		}
 
 		if (type == 0x00000010) {
-			if (!nv31_mpeg_mthd(mpeg, mthd, data))
+			if (nv31_mpeg_mthd(mpeg, mthd, data))
 				show &= ~0x01000000;
 		}
 	}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
index e536f37e24b0..c3cf02ed468e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
@@ -172,7 +172,7 @@ nv44_mpeg_intr(struct nvkm_engine *engine)
 		}
 
 		if (type == 0x00000010) {
-			if (!nv44_mpeg_mthd(subdev->device, mthd, data))
+			if (nv44_mpeg_mthd(subdev->device, mthd, data))
 				show &= ~0x01000000;
 		}
 	}

From f94773b9f5ecd1df7c88c2e921924dd41d2020cc Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 18 Mar 2017 16:23:10 -0400
Subject: [PATCH 246/410] drm/nouveau/mmu/nv4a: use nv04 mmu rather than the
 nv44 one

The NV4A (aka NV44A) is an oddity in the family. It only comes in AGP
and PCI varieties, rather than a core PCIE chip with a bridge for
AGP/PCI as necessary. As a result, it appears that the MMU is also
non-functional. For AGP cards, the vast majority of the NV4A lineup,
this worked out since we force AGP cards to use the nv04 mmu. However
for PCI variants, this did not work.

Switching to the NV04 MMU makes it work like a charm. Thanks to mwk for
the suggestion. This should be a no-op for NV4A AGP boards, as they were
using it already.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70388
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: stable@vger.kernel.org
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 273562dd6bbd..0fc41db34522 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -714,7 +714,7 @@ nv4a_chipset = {
 	.i2c = nv04_i2c_new,
 	.imem = nv40_instmem_new,
 	.mc = nv44_mc_new,
-	.mmu = nv44_mmu_new,
+	.mmu = nv04_mmu_new,
 	.pci = nv40_pci_new,
 	.therm = nv40_therm_new,
 	.timer = nv41_timer_new,

From d639fbcc102745187f747a21bdcffcf628e174c8 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 5 Apr 2017 09:12:54 +1000
Subject: [PATCH 247/410] drm/nouveau/kms/nv50: fix setting of
 HeadSetRasterVertBlankDmi method

Cc: stable@vger.kernel.org	[4.10+]
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nv50_display.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 0b4440ffbeae..4405ca1fb3e2 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -2036,6 +2036,7 @@ nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
 	u32 vbackp  = (mode->vtotal - mode->vsync_end) * vscan / ilace;
 	u32 hfrontp =  mode->hsync_start - mode->hdisplay;
 	u32 vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace;
+	u32 blankus;
 	struct nv50_head_mode *m = &asyh->mode;
 
 	m->h.active = mode->htotal;
@@ -2049,9 +2050,10 @@ nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
 	m->v.blanks = m->v.active - vfrontp - 1;
 
 	/*XXX: Safe underestimate, even "0" works */
-	m->v.blankus = (m->v.active - mode->vdisplay - 2) * m->h.active;
-	m->v.blankus *= 1000;
-	m->v.blankus /= mode->clock;
+	blankus = (m->v.active - mode->vdisplay - 2) * m->h.active;
+	blankus *= 1000;
+	blankus /= mode->clock;
+	m->v.blankus = blankus;
 
 	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
 		m->v.blank2e =  m->v.active + m->v.synce + vbackp;

From 2907e8670b6ef253bffb33bf47fd2182969cf2a0 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 5 Apr 2017 18:16:14 +1000
Subject: [PATCH 248/410] drm/nouveau/kms/nv50: fix double dma_fence_put() when
 destroying plane state

When the atomic support was added to nouveau, the DRM core did not do this.

However, later in the same merge window, a commit (drm/fence: add in-fences
support) was merged that added it, leading to use-after-frees of the fence
object.

Cc: stable@vger.kernel.org	[4.10+]
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nv50_display.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 4405ca1fb3e2..a9182d5e6011 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -995,7 +995,6 @@ nv50_wndw_atomic_destroy_state(struct drm_plane *plane,
 {
 	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
 	__drm_atomic_helper_plane_destroy_state(&asyw->state);
-	dma_fence_put(asyw->state.fence);
 	kfree(asyw);
 }
 
@@ -1007,7 +1006,6 @@ nv50_wndw_atomic_duplicate_state(struct drm_plane *plane)
 	if (!(asyw = kmalloc(sizeof(*asyw), GFP_KERNEL)))
 		return NULL;
 	__drm_atomic_helper_plane_duplicate_state(plane, &asyw->state);
-	asyw->state.fence = NULL;
 	asyw->interval = 1;
 	asyw->sema = armw->sema;
 	asyw->ntfy = armw->ntfy;

From da2ba564a6dcf46df4f828624ff55531ff11d5b0 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 6 Apr 2017 10:35:26 +1000
Subject: [PATCH 249/410] drm/nouveau: initial support (display-only) for GP107

Forked from GP106 implementation.

Split out from commit enabling secboot/gr support so that it can be
added to earlier kernels.

Cc: stable@vger.kernel.org	[4.10+]
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/nvkm/engine/device/base.c | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 0fc41db34522..3b86a7399567 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2271,6 +2271,35 @@ nv136_chipset = {
 	.fifo = gp100_fifo_new,
 };
 
+static const struct nvkm_device_chip
+nv137_chipset = {
+	.name = "GP107",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp102_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp102_ce_new,
+	.ce[1] = gp102_ce_new,
+	.ce[2] = gp102_ce_new,
+	.ce[3] = gp102_ce_new,
+	.disp = gp102_disp_new,
+	.dma = gf119_dma_new,
+	.fifo = gp100_fifo_new,
+};
+
 static int
 nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size,
 		       struct nvkm_notify *notify)
@@ -2708,6 +2737,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		case 0x132: device->chip = &nv132_chipset; break;
 		case 0x134: device->chip = &nv134_chipset; break;
 		case 0x136: device->chip = &nv136_chipset; break;
+		case 0x137: device->chip = &nv137_chipset; break;
 		default:
 			nvdev_error(device, "unknown chipset (%08x)\n", boot0);
 			goto done;

From abd80dcbc400fac878202136645e9acf0e0bfbd9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 17 Mar 2017 23:41:14 +0300
Subject: [PATCH 250/410] KVM: PPC: Book3S HV: Check for kmalloc errors in
 ioctl

kzalloc() won't actually fail because sizeof(*resize) is small, but
static checkers complain.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8c68145ba1bd..710e491206ed 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1487,6 +1487,10 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
 	/* start new resize */
 
 	resize = kzalloc(sizeof(*resize), GFP_KERNEL);
+	if (!resize) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	resize->order = shift;
 	resize->kvm = kvm;
 	INIT_WORK(&resize->work, resize_hpt_prepare_work);

From 0d98479738b950e30bb4f782d60099d44076ad67 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Wed, 5 Apr 2017 22:30:34 +0800
Subject: [PATCH 251/410] arm64: allwinner: a64: add pmu0 regs for USB PHY

The USB PHY in A64 has a "pmu0" region, which controls the EHCI/OHCI
controller pair that can be connected to the PHY0.

Add the MMIO region for PHY node.

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
---
 arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index 1c64ea2d23f9..0565779e66fa 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -179,8 +179,10 @@
 		usbphy: phy@01c19400 {
 			compatible = "allwinner,sun50i-a64-usb-phy";
 			reg = <0x01c19400 0x14>,
+			      <0x01c1a800 0x4>,
 			      <0x01c1b800 0x4>;
 			reg-names = "phy_ctrl",
+				    "pmu0",
 				    "pmu1";
 			clocks = <&ccu CLK_USB_PHY0>,
 				 <&ccu CLK_USB_PHY1>;

From 3c1460e934f371bf91963b49a2cf173ee73c2cae Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 6 Apr 2017 14:54:00 +0300
Subject: [PATCH 252/410] pwm: lpss: Split Tangier configuration

As a preparation for special treatment for Broxton we split Tangier
configuration.

Fixes: b89b4b7a3d0a ("pwm: lpss: pci: Enable PWM module on Intel Edison")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss-pci.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c
index 053088b9b66e..073dfb2337e0 100644
--- a/drivers/pwm/pwm-lpss-pci.c
+++ b/drivers/pwm/pwm-lpss-pci.c
@@ -38,6 +38,13 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = {
 	.base_unit_bits = 22,
 };
 
+/* Tangier */
+static const struct pwm_lpss_boardinfo pwm_lpss_tng_info = {
+	.clk_rate = 19200000,
+	.npwm = 4,
+	.base_unit_bits = 22,
+};
+
 static int pwm_lpss_probe_pci(struct pci_dev *pdev,
 			      const struct pci_device_id *id)
 {
@@ -97,7 +104,7 @@ static const struct pci_device_id pwm_lpss_pci_ids[] = {
 	{ PCI_VDEVICE(INTEL, 0x0ac8), (unsigned long)&pwm_lpss_bxt_info},
 	{ PCI_VDEVICE(INTEL, 0x0f08), (unsigned long)&pwm_lpss_byt_info},
 	{ PCI_VDEVICE(INTEL, 0x0f09), (unsigned long)&pwm_lpss_byt_info},
-	{ PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_bxt_info},
+	{ PCI_VDEVICE(INTEL, 0x11a5), (unsigned long)&pwm_lpss_tng_info},
 	{ PCI_VDEVICE(INTEL, 0x1ac8), (unsigned long)&pwm_lpss_bxt_info},
 	{ PCI_VDEVICE(INTEL, 0x2288), (unsigned long)&pwm_lpss_bsw_info},
 	{ PCI_VDEVICE(INTEL, 0x2289), (unsigned long)&pwm_lpss_bsw_info},

From b997e3edca4fb4ab7732ab7240c545da0c360a44 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 6 Apr 2017 14:54:01 +0300
Subject: [PATCH 253/410] pwm: lpss: Set enable-bit before waiting for
 update-bit to go low

At least on cherrytrail, the update bit will never go low when the
enabled bit is not set.

This causes the backlight on my cube iwork8 air tablet to never turn on
again after being turned off because in the pwm_lpss_apply enable path
pwm_lpss_update will fail causing an error exit and the enable-bit to
never get set. Any following pwm_lpss_apply calls will fail the
pwm_lpss_is_updating check.

Since the docs say that the update bit should be set before the
enable-bit, split pwm_lpss_update into setting the update-bit and
pwm_lpss_wait_for_update, and move the pwm_lpss_wait_for_update call
in the enable path to after setting the enable-bit.

Fixes: 10d56a4 ("pwm: lpss: Avoid reconfiguring while UPDATE bit...")
Cc: Ilkka Koskinen <ilkka.koskinen@intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss-pci.c      |  1 +
 drivers/pwm/pwm-lpss-platform.c |  1 +
 drivers/pwm/pwm-lpss.c          | 19 +++++++++++++------
 drivers/pwm/pwm-lpss.h          |  1 +
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/pwm/pwm-lpss-pci.c b/drivers/pwm/pwm-lpss-pci.c
index 073dfb2337e0..c1527cb645be 100644
--- a/drivers/pwm/pwm-lpss-pci.c
+++ b/drivers/pwm/pwm-lpss-pci.c
@@ -36,6 +36,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = {
 	.clk_rate = 19200000,
 	.npwm = 4,
 	.base_unit_bits = 22,
+	.bypass = true,
 };
 
 /* Tangier */
diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c
index b22b6fdadb9a..5d6ed1507d29 100644
--- a/drivers/pwm/pwm-lpss-platform.c
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -37,6 +37,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bxt_info = {
 	.clk_rate = 19200000,
 	.npwm = 4,
 	.base_unit_bits = 22,
+	.bypass = true,
 };
 
 static int pwm_lpss_probe_platform(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 689d2c1cbead..8db0d40ccacd 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -57,7 +57,7 @@ static inline void pwm_lpss_write(const struct pwm_device *pwm, u32 value)
 	writel(value, lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM);
 }
 
-static int pwm_lpss_update(struct pwm_device *pwm)
+static int pwm_lpss_wait_for_update(struct pwm_device *pwm)
 {
 	struct pwm_lpss_chip *lpwm = to_lpwm(pwm->chip);
 	const void __iomem *addr = lpwm->regs + pwm->hwpwm * PWM_SIZE + PWM;
@@ -65,8 +65,6 @@ static int pwm_lpss_update(struct pwm_device *pwm)
 	u32 val;
 	int err;
 
-	pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
-
 	/*
 	 * PWM Configuration register has SW_UPDATE bit that is set when a new
 	 * configuration is written to the register. The bit is automatically
@@ -122,6 +120,12 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
 	pwm_lpss_write(pwm, ctrl);
 }
 
+static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond)
+{
+	if (cond)
+		pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE);
+}
+
 static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 			  struct pwm_state *state)
 {
@@ -137,18 +141,21 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 				return ret;
 			}
 			pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-			ret = pwm_lpss_update(pwm);
+			pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
+			pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false);
+			ret = pwm_lpss_wait_for_update(pwm);
 			if (ret) {
 				pm_runtime_put(chip->dev);
 				return ret;
 			}
-			pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_ENABLE);
+			pwm_lpss_cond_enable(pwm, lpwm->info->bypass == true);
 		} else {
 			ret = pwm_lpss_is_updating(pwm);
 			if (ret)
 				return ret;
 			pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-			return pwm_lpss_update(pwm);
+			pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
+			return pwm_lpss_wait_for_update(pwm);
 		}
 	} else if (pwm_is_enabled(pwm)) {
 		pwm_lpss_write(pwm, pwm_lpss_read(pwm) & ~PWM_ENABLE);
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h
index c94cd7c2695d..98306bb02cfe 100644
--- a/drivers/pwm/pwm-lpss.h
+++ b/drivers/pwm/pwm-lpss.h
@@ -22,6 +22,7 @@ struct pwm_lpss_boardinfo {
 	unsigned long clk_rate;
 	unsigned int npwm;
 	unsigned long base_unit_bits;
+	bool bypass;
 };
 
 struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,

From fe1a83b43869790501ed82b58dc8229023c69f74 Mon Sep 17 00:00:00 2001
From: Xiaolei Yu <dreifachstein@gmail.com>
Date: Thu, 30 Mar 2017 19:43:09 +0800
Subject: [PATCH 254/410] HID: uclogic: add support for Ugee Tablet EX07S

This device has a different vendor id but responds to initialization.

Signed-off-by: Xiaolei Yu <dreifachstein@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c    | 1 +
 drivers/hid/hid-ids.h     | 3 +++
 drivers/hid/hid-uclogic.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 63ec1993eaaa..86bbd8a1fd4a 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2096,6 +2096,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SMARTJOY_PLUS) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_SUPER_JOY_BOX_3) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_DUAL_USB_JOYPAD) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 4e2648c86c8c..b26c030926c1 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1028,6 +1028,9 @@
 #define USB_DEVICE_ID_UGEE_TABLET_45		0x0045
 #define USB_DEVICE_ID_YIYNOVA_TABLET		0x004d
 
+#define USB_VENDOR_ID_UGEE		0x28bd
+#define USB_DEVICE_ID_UGEE_TABLET_EX07S		0x0071
+
 #define USB_VENDOR_ID_UNITEC	0x227d
 #define USB_DEVICE_ID_UNITEC_USB_TOUCH_0709	0x0709
 #define USB_DEVICE_ID_UNITEC_USB_TOUCH_0A19	0x0a19
diff --git a/drivers/hid/hid-uclogic.c b/drivers/hid/hid-uclogic.c
index 1509d7287ff3..e3e6e5c893cc 100644
--- a/drivers/hid/hid-uclogic.c
+++ b/drivers/hid/hid-uclogic.c
@@ -977,6 +977,7 @@ static int uclogic_probe(struct hid_device *hdev,
 		}
 		break;
 	case USB_DEVICE_ID_UGTIZER_TABLET_GP0610:
+	case USB_DEVICE_ID_UGEE_TABLET_EX07S:
 		/* If this is the pen interface */
 		if (intf->cur_altsetting->desc.bInterfaceNumber == 1) {
 			rc = uclogic_tablet_enable(hdev);
@@ -1069,6 +1070,7 @@ static const struct hid_device_id uclogic_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UGEE_TABLET_45) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_DRAWIMAGE_G3) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_GP0610) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_UGEE, USB_DEVICE_ID_UGEE_TABLET_EX07S) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, uclogic_devices);

From a900152b5c29aea8134cc7a4c5db25552b3cd8f7 Mon Sep 17 00:00:00 2001
From: David Wu <david.wu@rock-chips.com>
Date: Wed, 1 Mar 2017 19:10:55 +0800
Subject: [PATCH 255/410] pwm: rockchip: State of PWM clock should synchronize
 with PWM enabled state

If the PWM was not enabled at U-Boot loader, PWM could not work for
clock always disabled at PWM driver. The PWM clock is enabled at
beginning of pwm_apply(), but disabled at end of pwm_apply().

If the PWM was enabled at U-Boot loader, PWM clock is always enabled
unless closed by ATF. The pwm-backlight might turn off the power at
early suspend, should disable PWM clock for saving power consume.

It is important to provide opportunity to enable/disable clock at PWM
driver, the PWM consumer should ensure correct order to call PWM enable
and disable, and PWM driver ensure state of PWM clock synchronized with
PWM enabled state.

Fixes: 2bf1c98aa5a4 ("pwm: rockchip: Add support for atomic update")
Cc: stable@vger.kernel.org
Signed-off-by: David Wu <david.wu@rock-chips.com>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-rockchip.c | 40 +++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c
index ef89df1f7336..744d56197286 100644
--- a/drivers/pwm/pwm-rockchip.c
+++ b/drivers/pwm/pwm-rockchip.c
@@ -191,6 +191,28 @@ static int rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	return 0;
 }
 
+static int rockchip_pwm_enable(struct pwm_chip *chip,
+			 struct pwm_device *pwm,
+			 bool enable,
+			 enum pwm_polarity polarity)
+{
+	struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
+	int ret;
+
+	if (enable) {
+		ret = clk_enable(pc->clk);
+		if (ret)
+			return ret;
+	}
+
+	pc->data->set_enable(chip, pwm, enable, polarity);
+
+	if (!enable)
+		clk_disable(pc->clk);
+
+	return 0;
+}
+
 static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 			      struct pwm_state *state)
 {
@@ -207,22 +229,26 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 		return ret;
 
 	if (state->polarity != curstate.polarity && enabled) {
-		pc->data->set_enable(chip, pwm, false, state->polarity);
+		ret = rockchip_pwm_enable(chip, pwm, false, state->polarity);
+		if (ret)
+			goto out;
 		enabled = false;
 	}
 
 	ret = rockchip_pwm_config(chip, pwm, state->duty_cycle, state->period);
 	if (ret) {
 		if (enabled != curstate.enabled)
-			pc->data->set_enable(chip, pwm, !enabled,
-					     state->polarity);
-
+			rockchip_pwm_enable(chip, pwm, !enabled,
+				      state->polarity);
 		goto out;
 	}
 
-	if (state->enabled != enabled)
-		pc->data->set_enable(chip, pwm, state->enabled,
-				     state->polarity);
+	if (state->enabled != enabled) {
+		ret = rockchip_pwm_enable(chip, pwm, state->enabled,
+				    state->polarity);
+		if (ret)
+			goto out;
+	}
 
 	/*
 	 * Update the state with the real hardware, which can differ a bit

From 9ae34dbd8afd790cb5f52467e4f816434379eafa Mon Sep 17 00:00:00 2001
From: Tom Hromatka <tom.hromatka@oracle.com>
Date: Fri, 31 Mar 2017 16:31:42 -0600
Subject: [PATCH 256/410] sparc64: Fix kernel panic due to erroneous #ifdef
 surrounding pmd_write()

This commit moves sparc64's prototype of pmd_write() outside
of the CONFIG_TRANSPARENT_HUGEPAGE ifdef.

In 2013, commit a7b9403f0e6d ("sparc64: Encode huge PMDs using PTE
encoding.") exposed a path where pmd_write() could be called without
CONFIG_TRANSPARENT_HUGEPAGE defined.  This can result in the panic below.

The diff is awkward to read, but the changes are straightforward.
pmd_write() was moved outside of #ifdef CONFIG_TRANSPARENT_HUGEPAGE.
Also, __HAVE_ARCH_PMD_WRITE was defined.

kernel BUG at include/asm-generic/pgtable.h:576!
              \|/ ____ \|/
              "@'/ .. \`@"
              /_| \__/ |_\
                 \__U_/
oracle_8114_cdb(8114): Kernel bad sw trap 5 [#1]
CPU: 120 PID: 8114 Comm: oracle_8114_cdb Not tainted
4.1.12-61.7.1.el6uek.rc1.sparc64 #1
task: fff8400700a24d60 ti: fff8400700bc4000 task.ti: fff8400700bc4000
TSTATE: 0000004411e01607 TPC: 00000000004609f8 TNPC: 00000000004609fc Y:
00000005    Not tainted
TPC: <gup_huge_pmd+0x198/0x1e0>
g0: 000000000001c000 g1: 0000000000ef3954 g2: 0000000000000000 g3: 0000000000000001
g4: fff8400700a24d60 g5: fff8001fa5c10000 g6: fff8400700bc4000 g7: 0000000000000720
o0: 0000000000bc5058 o1: 0000000000000240 o2: 0000000000006000 o3: 0000000000001c00
o4: 0000000000000000 o5: 0000048000080000 sp: fff8400700bc6ab1 ret_pc: 00000000004609f0
RPC: <gup_huge_pmd+0x190/0x1e0>
l0: fff8400700bc74fc l1: 0000000000020000 l2: 0000000000002000 l3: 0000000000000000
l4: fff8001f93250950 l5: 000000000113f800 l6: 0000000000000004 l7: 0000000000000000
i0: fff8400700ca46a0 i1: bd0000085e800453 i2: 000000026a0c4000 i3: 000000026a0c6000
i4: 0000000000000001 i5: fff800070c958de8 i6: fff8400700bc6b61 i7: 0000000000460dd0
I7: <gup_pud_range+0x170/0x1a0>
Call Trace:
 [0000000000460dd0] gup_pud_range+0x170/0x1a0
 [0000000000460e84] get_user_pages_fast+0x84/0x120
 [00000000006f5a18] iov_iter_get_pages+0x98/0x240
 [00000000005fa744] do_direct_IO+0xf64/0x1e00
 [00000000005fbbc0] __blockdev_direct_IO+0x360/0x15a0
 [00000000101f74fc] ext4_ind_direct_IO+0xdc/0x400 [ext4]
 [00000000101af690] ext4_ext_direct_IO+0x1d0/0x2c0 [ext4]
 [00000000101af86c] ext4_direct_IO+0xec/0x220 [ext4]
 [0000000000553bd4] generic_file_read_iter+0x114/0x140
 [00000000005bdc2c] __vfs_read+0xac/0x100
 [00000000005bf254] vfs_read+0x54/0x100
 [00000000005bf368] SyS_pread64+0x68/0x80

Signed-off-by: Tom Hromatka <tom.hromatka@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/pgtable_64.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 8a598528ec1f..6fbd931f0570 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -679,6 +679,14 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 	return pte_pfn(pte);
 }
 
+#define __HAVE_ARCH_PMD_WRITE
+static inline unsigned long pmd_write(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_write(pte);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline unsigned long pmd_dirty(pmd_t pmd)
 {
@@ -694,13 +702,6 @@ static inline unsigned long pmd_young(pmd_t pmd)
 	return pte_young(pte);
 }
 
-static inline unsigned long pmd_write(pmd_t pmd)
-{
-	pte_t pte = __pte(pmd_val(pmd));
-
-	return pte_write(pte);
-}
-
 static inline unsigned long pmd_trans_huge(pmd_t pmd)
 {
 	pte_t pte = __pte(pmd_val(pmd));

From 76811263b3fa6347699a446cddeb63badf3e6095 Mon Sep 17 00:00:00 2001
From: Nitin Gupta <nitin.m.gupta@oracle.com>
Date: Fri, 31 Mar 2017 15:48:53 -0700
Subject: [PATCH 257/410] sparc64: Fix memory corruption when THP is enabled

The memory corruption was happening due to incorrect
TLB/TSB flushing of hugepages.

Reported-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/tlb.c | 6 +++---
 arch/sparc/mm/tsb.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index afda3bbf7854..ee8066c3d96c 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -154,7 +154,7 @@ static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
 		if (pte_val(*pte) & _PAGE_VALID) {
 			bool exec = pte_exec(*pte);
 
-			tlb_batch_add_one(mm, vaddr, exec, false);
+			tlb_batch_add_one(mm, vaddr, exec, PAGE_SHIFT);
 		}
 		pte++;
 		vaddr += PAGE_SIZE;
@@ -209,9 +209,9 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			pte_t orig_pte = __pte(pmd_val(orig));
 			bool exec = pte_exec(orig_pte);
 
-			tlb_batch_add_one(mm, addr, exec, true);
+			tlb_batch_add_one(mm, addr, exec, REAL_HPAGE_SHIFT);
 			tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec,
-					true);
+					  REAL_HPAGE_SHIFT);
 		} else {
 			tlb_batch_pmd_scan(mm, addr, orig);
 		}
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 0a04811f06b7..bedf08b22a47 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -122,7 +122,7 @@ void flush_tsb_user(struct tlb_batch *tb)
 
 	spin_lock_irqsave(&mm->context.lock, flags);
 
-	if (tb->hugepage_shift < HPAGE_SHIFT) {
+	if (tb->hugepage_shift < REAL_HPAGE_SHIFT) {
 		base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
 		nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
@@ -155,7 +155,7 @@ void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr,
 
 	spin_lock_irqsave(&mm->context.lock, flags);
 
-	if (hugepage_shift < HPAGE_SHIFT) {
+	if (hugepage_shift < REAL_HPAGE_SHIFT) {
 		base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
 		nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
 		if (tlb_type == cheetah_plus || tlb_type == hypervisor)

From 9d262d95114cf2e2ac5e0ff358347fa2e214eda5 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 1 Apr 2017 13:47:44 -0700
Subject: [PATCH 258/410] sparc32: Export vac_cache_size to fix build error

sparc32:allmodconfig fails to build with the following error.

ERROR: "vac_cache_size" [drivers/infiniband/sw/rxe/rdma_rxe.ko] undefined!

Fixes: cb8864559631 ("infiniband: Fix alignment of mmap cookies ...")
Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Doug Ledford <dledford@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/mm/srmmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index def82f6d626f..8e76ebba2986 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -54,6 +54,7 @@
 enum mbus_module srmmu_modtype;
 static unsigned int hwbug_bitmask;
 int vac_cache_size;
+EXPORT_SYMBOL(vac_cache_size);
 int vac_line_size;
 
 extern struct resource sparc_iomap;

From 86e1066fe2af51dce4351c2357223e0d902dfc7a Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Wed, 5 Apr 2017 21:24:20 +0200
Subject: [PATCH 259/410] sparc: remove unused wp_works_ok macro

It's unused for ages, used to be required for ksyms.c back in the v1.1
times.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/processor_32.h | 6 ------
 arch/sparc/include/asm/processor_64.h | 4 ----
 2 files changed, 10 deletions(-)

diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index 365d4cb267b4..dd27159819eb 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -18,12 +18,6 @@
 #include <asm/signal.h>
 #include <asm/page.h>
 
-/*
- * The sparc has no problems with write protection
- */
-#define wp_works_ok 1
-#define wp_works_ok__is_a_macro /* for versions in ksyms.c */
-
 /* Whee, this is STACK_TOP + PAGE_SIZE and the lowest kernel address too...
  * That one page is used to protect kernel from intruders, so that
  * we can make our access_ok test faster
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 6448cfc8292f..b58ee9018433 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -18,10 +18,6 @@
 #include <asm/ptrace.h>
 #include <asm/page.h>
 
-/* The sparc has no problems with write protection */
-#define wp_works_ok 1
-#define wp_works_ok__is_a_macro /* for versions in ksyms.c */
-
 /*
  * User lives in his very own context, and cannot reference us. Note
  * that TASK_SIZE is a misnomer, it really gives maximum user virtual

From a8801799c6975601fd58ae62f48964caec2eb83f Mon Sep 17 00:00:00 2001
From: Florian Larysch <fl@n621.de>
Date: Mon, 3 Apr 2017 16:46:09 +0200
Subject: [PATCH 260/410] net: ipv4: fix multipath RTM_GETROUTE behavior when
 iif is given

inet_rtm_getroute synthesizes a skeletal ICMP skb, which is passed to
ip_route_input when iif is given. If a multipath route is present for
the designated destination, ip_multipath_icmp_hash ends up being called,
which uses the source/destination addresses within the skb to calculate
a hash. However, those are not set in the synthetic skb, causing it to
return an arbitrary and incorrect result.

Instead, use UDP, which gets no such special treatment.

Signed-off-by: Florian Larysch <fl@n621.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8471dd116771..acd69cfe2951 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2620,7 +2620,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	skb_reset_network_header(skb);
 
 	/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
-	ip_hdr(skb)->protocol = IPPROTO_ICMP;
+	ip_hdr(skb)->protocol = IPPROTO_UDP;
 	skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
 
 	src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;

From 92f9170621a13e4ba4eda6dfb070318d5405a0cf Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 4 Apr 2017 18:51:30 -0700
Subject: [PATCH 261/410] net_sched: check noop_qdisc before qdisc_hash_add()

Dmitry reported a crash when injecting faults in
attach_one_default_qdisc() and dev->qdisc is still
a noop_disc, the check before qdisc_hash_add() fails
to catch it because it tests NULL. We should test
against noop_qdisc since it is the default qdisc
at this point.

Fixes: 59cc1f61f09c ("net: sched: convert qdisc linked list to hashtable")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b052b27a984e..1a2f9e964330 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -794,7 +794,7 @@ static void attach_default_qdiscs(struct net_device *dev)
 		}
 	}
 #ifdef CONFIG_NET_SCHED
-	if (dev->qdisc)
+	if (dev->qdisc != &noop_qdisc)
 		qdisc_hash_add(dev->qdisc);
 #endif
 }

From 6c22fce07c97f765af1808ec3be007847e0b47d1 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Wed, 5 Apr 2017 14:14:39 +0200
Subject: [PATCH 262/410] usbnet: make sure no NULL pointer is passed through

Coverity reports:

** CID 751368:  Null pointer dereferences  (FORWARD_NULL)
/drivers/net/usb/usbnet.c: 1925 in __usbnet_read_cmd()

________________________________________________________________________________________________________
*** CID 751368:  Null pointer dereferences  (FORWARD_NULL)
/drivers/net/usb/usbnet.c: 1925 in __usbnet_read_cmd()
1919     EXPORT_SYMBOL(usbnet_link_change);
1920
1921     /*-------------------------------------------------------------------------*/
1922     static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
1923                                 u16 value, u16 index, void *data, u16 size)
1924     {
>>>     CID 751368:  Null pointer dereferences  (FORWARD_NULL)
>>>     Assigning: "buf" = "NULL".
1925            void *buf = NULL;
1926            int err = -ENOMEM;
1927
1928            netdev_dbg(dev->net, "usbnet_read_cmd cmd=0x%02x reqtype=%02x"
1929                       " value=0x%04x index=0x%04x size=%d\n",
1930                       cmd, reqtype, value, index, size);

** CID 751370:  Null pointer dereferences  (FORWARD_NULL)
/drivers/net/usb/usbnet.c: 1952 in __usbnet_write_cmd()

________________________________________________________________________________________________________
*** CID 751370:  Null pointer dereferences  (FORWARD_NULL)
/drivers/net/usb/usbnet.c: 1952 in __usbnet_write_cmd()
1946     }
1947
1948     static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
1949                                  u16 value, u16 index, const void *data,
1950                                  u16 size)
1951     {
>>>     CID 751370:  Null pointer dereferences  (FORWARD_NULL)
>>>     Assigning: "buf" = "NULL".
1952            void *buf = NULL;
1953            int err = -ENOMEM;
1954
1955            netdev_dbg(dev->net, "usbnet_write_cmd cmd=0x%02x reqtype=%02x"
1956                       " value=0x%04x index=0x%04x size=%d\n",
1957                       cmd, reqtype, value, index, size);

** CID 1325026:  Null pointer dereferences  (FORWARD_NULL)
/drivers/net/usb/ch9200.c: 143 in control_write()

It is valid to offer commands without a buffer, but then you need a size
of zero. This should actually be checked.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 3de65ea6531a..453244805c52 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1929,7 +1929,7 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 		   " value=0x%04x index=0x%04x size=%d\n",
 		   cmd, reqtype, value, index, size);
 
-	if (data) {
+	if (size) {
 		buf = kmalloc(size, GFP_KERNEL);
 		if (!buf)
 			goto out;
@@ -1938,8 +1938,13 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 	err = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
 			      cmd, reqtype, value, index, buf, size,
 			      USB_CTRL_GET_TIMEOUT);
-	if (err > 0 && err <= size)
-		memcpy(data, buf, err);
+	if (err > 0 && err <= size) {
+        if (data)
+            memcpy(data, buf, err);
+        else
+            netdev_dbg(dev->net,
+                "Huh? Data requested but thrown away.\n");
+    }
 	kfree(buf);
 out:
 	return err;
@@ -1960,7 +1965,13 @@ static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype,
 		buf = kmemdup(data, size, GFP_KERNEL);
 		if (!buf)
 			goto out;
-	}
+	} else {
+        if (size) {
+            WARN_ON_ONCE(1);
+            err = -EINVAL;
+            goto out;
+        }
+    }
 
 	err = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
 			      cmd, reqtype, value, index, buf, size,

From 34b2789f1d9bf8dcca9b5cb553d076ca2cd898ee Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 6 Apr 2017 13:10:52 +0800
Subject: [PATCH 263/410] sctp: listen on the sock only when it's state is
 listening or closed

Now sctp doesn't check sock's state before listening on it. It could
even cause changing a sock with any state to become a listening sock
when doing sctp_listen.

This patch is to fix it by checking sock's state in sctp_listen, so
that it will listen on the sock with right state.

Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c1401f43d40f..d9d4c92e06b3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7034,6 +7034,9 @@ int sctp_inet_listen(struct socket *sock, int backlog)
 	if (sock->state != SS_UNCONNECTED)
 		goto out;
 
+	if (!sctp_sstate(sk, LISTENING) && !sctp_sstate(sk, CLOSED))
+		goto out;
+
 	/* If backlog is zero, disable listening. */
 	if (!backlog) {
 		if (sctp_sstate(sk, CLOSED))

From 16cf72bb085626ae1323e59985d6cbb58a8f71d8 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 6 Apr 2017 13:41:28 +0800
Subject: [PATCH 264/410] team: call netdev_change_features out of team lock

Commit f6988cb63a4e ("team: don't call netdev_change_features under
team->lock") fixed the issue calling netdev_change_features under
team->lock for team_compute_features.

But there are still two places where it calls netdev_change_features
under team->lock, team_port_add and team_port_del. It may cause a
dead lock when the slave port with LRO enabled is added.

This patch is to fix this dead lock by moving netdev_change_features
out of team_port_add and team_port_del, and call it after unlocking
the team lock.

Reported-by: Patrick Talbert <ptalbert@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 1b52520715ae..f8c81f12d988 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -990,7 +990,7 @@ static void team_port_disable(struct team *team,
 #define TEAM_ENC_FEATURES	(NETIF_F_HW_CSUM | NETIF_F_SG | \
 				 NETIF_F_RXCSUM | NETIF_F_ALL_TSO)
 
-static void ___team_compute_features(struct team *team)
+static void __team_compute_features(struct team *team)
 {
 	struct team_port *port;
 	u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
@@ -1023,16 +1023,10 @@ static void ___team_compute_features(struct team *team)
 		team->dev->priv_flags |= IFF_XMIT_DST_RELEASE;
 }
 
-static void __team_compute_features(struct team *team)
-{
-	___team_compute_features(team);
-	netdev_change_features(team->dev);
-}
-
 static void team_compute_features(struct team *team)
 {
 	mutex_lock(&team->lock);
-	___team_compute_features(team);
+	__team_compute_features(team);
 	mutex_unlock(&team->lock);
 	netdev_change_features(team->dev);
 }
@@ -1641,6 +1635,7 @@ static void team_uninit(struct net_device *dev)
 	team_notify_peers_fini(team);
 	team_queue_override_fini(team);
 	mutex_unlock(&team->lock);
+	netdev_change_features(dev);
 }
 
 static void team_destructor(struct net_device *dev)
@@ -1928,6 +1923,10 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
 	mutex_lock(&team->lock);
 	err = team_port_add(team, port_dev);
 	mutex_unlock(&team->lock);
+
+	if (!err)
+		netdev_change_features(dev);
+
 	return err;
 }
 
@@ -1939,6 +1938,10 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
 	mutex_lock(&team->lock);
 	err = team_port_del(team, port_dev);
 	mutex_unlock(&team->lock);
+
+	if (!err)
+		netdev_change_features(dev);
+
 	return err;
 }
 

From 6118714275f0a313ecc296a87ed1af32d9691bed Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 30 Mar 2017 09:16:39 -0700
Subject: [PATCH 265/410] pinctrl: core: Fix pinctrl_register_and_init() with
 pinctrl_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recent pinctrl changes to allow dynamic allocation of pins exposed one
more issue with the pinctrl pins claimed early by the controller itself.
This caused a regression for IMX6 pinctrl hogs.

Before enabling the pin controller driver we need to wait until it has
been properly initialized, then claim the hogs, and only then enable it.

To fix the regression, split the code into pinctrl_claim_hogs() and
pinctrl_enable(). And then let's require that pinctrl_enable() is always
called by the pin controller driver when ready after calling
pinctrl_register_and_init().

Depends-on: 950b0d91dc10 ("pinctrl: core: Fix regression caused by delayed
work for hogs")
Fixes: df61b366af26 ("pinctrl: core: Use delayed work for hogs")
Fixes: e566fc11ea76 ("pinctrl: imx: use generic pinctrl helpers for
managing groups")
Cc: Haojian Zhuang <haojian.zhuang@linaro.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Mika Penttilä <mika.penttila@nextfour.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Nishanth Menon <nm@ti.com>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Stefan Agner <stefan@agner.ch>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Gary Bisson <gary.bisson@boundarydevices.com>
Tested-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt               |  8 +-
 drivers/pinctrl/core.c                  | 97 ++++++++++++++++---------
 drivers/pinctrl/freescale/pinctrl-imx.c |  2 +-
 drivers/pinctrl/pinctrl-single.c        |  2 +-
 drivers/pinctrl/sh-pfc/pinctrl.c        | 11 ++-
 drivers/pinctrl/ti/pinctrl-ti-iodelay.c |  2 +
 include/linux/pinctrl/pinctrl.h         |  3 +-
 7 files changed, 83 insertions(+), 42 deletions(-)

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 54bd5faa8782..f2af35f6d6b2 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -77,9 +77,15 @@ static struct pinctrl_desc foo_desc = {
 
 int __init foo_probe(void)
 {
+	int error;
+
 	struct pinctrl_dev *pctl;
 
-	return pinctrl_register_and_init(&foo_desc, <PARENT>, NULL, &pctl);
+	error = pinctrl_register_and_init(&foo_desc, <PARENT>, NULL, &pctl);
+	if (error)
+		return error;
+
+	return pinctrl_enable(pctl);
 }
 
 To enable the pinctrl subsystem and the subgroups for PINMUX and PINCONF and
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index d69046537b75..32822b0d9cd0 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -2010,29 +2010,57 @@ out_err:
 	return ERR_PTR(ret);
 }
 
-static int pinctrl_create_and_start(struct pinctrl_dev *pctldev)
+static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev)
 {
 	pctldev->p = create_pinctrl(pctldev->dev, pctldev);
-	if (!IS_ERR(pctldev->p)) {
-		kref_get(&pctldev->p->users);
-		pctldev->hog_default =
-			pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT);
-		if (IS_ERR(pctldev->hog_default)) {
-			dev_dbg(pctldev->dev,
-				"failed to lookup the default state\n");
-		} else {
-			if (pinctrl_select_state(pctldev->p,
-						pctldev->hog_default))
-				dev_err(pctldev->dev,
-					"failed to select default state\n");
-		}
+	if (PTR_ERR(pctldev->p) == -ENODEV) {
+		dev_dbg(pctldev->dev, "no hogs found\n");
 
-		pctldev->hog_sleep =
-			pinctrl_lookup_state(pctldev->p,
-						    PINCTRL_STATE_SLEEP);
-		if (IS_ERR(pctldev->hog_sleep))
-			dev_dbg(pctldev->dev,
-				"failed to lookup the sleep state\n");
+		return 0;
+	}
+
+	if (IS_ERR(pctldev->p)) {
+		dev_err(pctldev->dev, "error claiming hogs: %li\n",
+			PTR_ERR(pctldev->p));
+
+		return PTR_ERR(pctldev->p);
+	}
+
+	kref_get(&pctldev->p->users);
+	pctldev->hog_default =
+		pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT);
+	if (IS_ERR(pctldev->hog_default)) {
+		dev_dbg(pctldev->dev,
+			"failed to lookup the default state\n");
+	} else {
+		if (pinctrl_select_state(pctldev->p,
+					 pctldev->hog_default))
+			dev_err(pctldev->dev,
+				"failed to select default state\n");
+	}
+
+	pctldev->hog_sleep =
+		pinctrl_lookup_state(pctldev->p,
+				     PINCTRL_STATE_SLEEP);
+	if (IS_ERR(pctldev->hog_sleep))
+		dev_dbg(pctldev->dev,
+			"failed to lookup the sleep state\n");
+
+	return 0;
+}
+
+int pinctrl_enable(struct pinctrl_dev *pctldev)
+{
+	int error;
+
+	error = pinctrl_claim_hogs(pctldev);
+	if (error) {
+		dev_err(pctldev->dev, "could not claim hogs: %i\n",
+			error);
+		mutex_destroy(&pctldev->mutex);
+		kfree(pctldev);
+
+		return error;
 	}
 
 	mutex_lock(&pinctrldev_list_mutex);
@@ -2043,6 +2071,7 @@ static int pinctrl_create_and_start(struct pinctrl_dev *pctldev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pinctrl_enable);
 
 /**
  * pinctrl_register() - register a pin controller device
@@ -2065,25 +2094,30 @@ struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
 	if (IS_ERR(pctldev))
 		return pctldev;
 
-	error = pinctrl_create_and_start(pctldev);
-	if (error) {
-		mutex_destroy(&pctldev->mutex);
-		kfree(pctldev);
-
+	error = pinctrl_enable(pctldev);
+	if (error)
 		return ERR_PTR(error);
-	}
 
 	return pctldev;
 
 }
 EXPORT_SYMBOL_GPL(pinctrl_register);
 
+/**
+ * pinctrl_register_and_init() - register and init pin controller device
+ * @pctldesc: descriptor for this pin controller
+ * @dev: parent device for this pin controller
+ * @driver_data: private pin controller data for this pin controller
+ * @pctldev: pin controller device
+ *
+ * Note that pinctrl_enable() still needs to be manually called after
+ * this once the driver is ready.
+ */
 int pinctrl_register_and_init(struct pinctrl_desc *pctldesc,
 			      struct device *dev, void *driver_data,
 			      struct pinctrl_dev **pctldev)
 {
 	struct pinctrl_dev *p;
-	int error;
 
 	p = pinctrl_init_controller(pctldesc, dev, driver_data);
 	if (IS_ERR(p))
@@ -2097,15 +2131,6 @@ int pinctrl_register_and_init(struct pinctrl_desc *pctldesc,
 	 */
 	*pctldev = p;
 
-	error = pinctrl_create_and_start(p);
-	if (error) {
-		mutex_destroy(&p->mutex);
-		kfree(p);
-		*pctldev = NULL;
-
-		return error;
-	}
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(pinctrl_register_and_init);
diff --git a/drivers/pinctrl/freescale/pinctrl-imx.c b/drivers/pinctrl/freescale/pinctrl-imx.c
index a7ace9e1ad81..74bd90dfd7b1 100644
--- a/drivers/pinctrl/freescale/pinctrl-imx.c
+++ b/drivers/pinctrl/freescale/pinctrl-imx.c
@@ -790,7 +790,7 @@ int imx_pinctrl_probe(struct platform_device *pdev,
 
 	dev_info(&pdev->dev, "initialized IMX pinctrl driver\n");
 
-	return 0;
+	return pinctrl_enable(ipctl->pctl);
 
 free:
 	imx_free_resources(ipctl);
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 8b2d45e85bae..9c267dcda094 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1781,7 +1781,7 @@ static int pcs_probe(struct platform_device *pdev)
 	dev_info(pcs->dev, "%i pins at pa %p size %u\n",
 		 pcs->desc.npins, pcs->base, pcs->size);
 
-	return 0;
+	return pinctrl_enable(pcs->pctl);
 
 free:
 	pcs_free_resources(pcs);
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c
index 08150a321be6..a70157f0acf4 100644
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -816,6 +816,13 @@ int sh_pfc_register_pinctrl(struct sh_pfc *pfc)
 	pmx->pctl_desc.pins = pmx->pins;
 	pmx->pctl_desc.npins = pfc->info->nr_pins;
 
-	return devm_pinctrl_register_and_init(pfc->dev, &pmx->pctl_desc, pmx,
-					      &pmx->pctl);
+	ret = devm_pinctrl_register_and_init(pfc->dev, &pmx->pctl_desc, pmx,
+					     &pmx->pctl);
+	if (ret) {
+		dev_err(pfc->dev, "could not register: %i\n", ret);
+
+		return ret;
+	}
+
+	return pinctrl_enable(pmx->pctl);
 }
diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c
index 717e3404900c..362c50918c13 100644
--- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c
+++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c
@@ -893,6 +893,8 @@ static int ti_iodelay_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, iod);
 
+	return pinctrl_enable(iod->pctl);
+
 exit_out:
 	of_node_put(np);
 	return ret;
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 8ce2d87a238b..5e45385c5bdc 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -145,8 +145,9 @@ struct pinctrl_desc {
 extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc,
 				     struct device *dev, void *driver_data,
 				     struct pinctrl_dev **pctldev);
+extern int pinctrl_enable(struct pinctrl_dev *pctldev);
 
-/* Please use pinctrl_register_and_init() instead */
+/* Please use pinctrl_register_and_init() and pinctrl_enable() instead */
 extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc,
 				struct device *dev, void *driver_data);
 

From 4749228f022893faf54a3dbc70796f78b7d4f342 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 Apr 2017 23:34:38 +1000
Subject: [PATCH 266/410] powerpc/crypto/crc32c-vpmsum: Fix missing
 preempt_disable()

In crc32c_vpmsum() we call enable_kernel_altivec() without first
disabling preemption, which is not allowed:

  WARNING: CPU: 9 PID: 2949 at ../arch/powerpc/kernel/process.c:277 enable_kernel_altivec+0x100/0x120
  Modules linked in: dm_thin_pool dm_persistent_data dm_bio_prison dm_bufio libcrc32c vmx_crypto ...
  CPU: 9 PID: 2949 Comm: docker Not tainted 4.11.0-rc5-compiler_gcc-6.3.1-00033-g308ac7563944 #381
  ...
  NIP [c00000000001e320] enable_kernel_altivec+0x100/0x120
  LR [d000000003df0910] crc32c_vpmsum+0x108/0x150 [crc32c_vpmsum]
  Call Trace:
    0xc138fd09 (unreliable)
    crc32c_vpmsum+0x108/0x150 [crc32c_vpmsum]
    crc32c_vpmsum_update+0x3c/0x60 [crc32c_vpmsum]
    crypto_shash_update+0x88/0x1c0
    crc32c+0x64/0x90 [libcrc32c]
    dm_bm_checksum+0x48/0x80 [dm_persistent_data]
    sb_check+0x84/0x120 [dm_thin_pool]
    dm_bm_validate_buffer.isra.0+0xc0/0x1b0 [dm_persistent_data]
    dm_bm_read_lock+0x80/0xf0 [dm_persistent_data]
    __create_persistent_data_objects+0x16c/0x810 [dm_thin_pool]
    dm_pool_metadata_open+0xb0/0x1a0 [dm_thin_pool]
    pool_ctr+0x4cc/0xb60 [dm_thin_pool]
    dm_table_add_target+0x16c/0x3c0
    table_load+0x184/0x400
    ctl_ioctl+0x2f0/0x560
    dm_ctl_ioctl+0x38/0x50
    do_vfs_ioctl+0xd8/0x920
    SyS_ioctl+0x68/0xc0
    system_call+0x38/0xfc

It used to be sufficient just to call pagefault_disable(), because that
also disabled preemption. But the two were decoupled in commit 8222dbe21e79
("sched/preempt, mm/fault: Decouple preemption from the page fault
logic") in mid 2015.

So add the missing preempt_disable/enable(). We should also call
disable_kernel_fp(), although it does nothing by default, there is a
debug switch to make it active and all enables should be paired with
disables.

Fixes: 6dd7a82cc54e ("crypto: powerpc - Add POWER8 optimised crc32c")
Cc: stable@vger.kernel.org # v4.8+
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/crypto/crc32c-vpmsum_glue.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index 411994551afc..f058e0c3e4d4 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -33,10 +33,13 @@ static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len)
 	}
 
 	if (len & ~VMX_ALIGN_MASK) {
+		preempt_disable();
 		pagefault_disable();
 		enable_kernel_altivec();
 		crc = __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+		disable_kernel_altivec();
 		pagefault_enable();
+		preempt_enable();
 	}
 
 	tail = len & VMX_ALIGN_MASK;

From 6ae979ab39a368c18ceb0424bf824d172d6ab56f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 31 Mar 2017 12:23:43 +0100
Subject: [PATCH 267/410] Revert "Revert "arm64: hugetlb: partial revert of
 66b3923a1a0f""

The use of the contiguous bit by our hugetlb implementation violates
the break-before-make requirements of the architecture and can lead to
silent data corruption or TLB conflict aborts. Once again, disable these
hugetlb sizes whilst it gets worked out.

This reverts commit ab2e1b89230fa80328262c91d2d0a539a2790d6f.

Conflicts:
	arch/arm64/mm/hugetlbpage.c

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/hugetlbpage.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index e25584d72396..7514a000e361 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -294,10 +294,6 @@ static __init int setup_hugepagesz(char *opt)
 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 	} else if (ps == PUD_SIZE) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
-		hugetlb_add_hstate(CONT_PTE_SHIFT);
-	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
-		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
 	} else {
 		hugetlb_bad_size();
 		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
@@ -306,13 +302,3 @@ static __init int setup_hugepagesz(char *opt)
 	return 1;
 }
 __setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
-	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
-		hugetlb_add_hstate(CONT_PTE_SHIFT);
-	return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif

From 38bd49064a1ecb67baad33598e3d824448ab11ec Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Fri, 3 Mar 2017 15:41:38 -0800
Subject: [PATCH 268/410] Handle mismatched open calls

A signal can interrupt a SendReceive call which result in incoming
responses to the call being ignored. This is a problem for calls such as
open which results in the successful response being ignored. This
results in an open file resource on the server.

The patch looks into responses which were cancelled after being sent and
in case of successful open closes the open fids.

For this patch, the check is only done in SendReceive2()

RH-bz: 1403319

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
Cc: Stable <stable@vger.kernel.org>
---
 fs/cifs/cifsglob.h      | 11 ++++++++
 fs/cifs/cifsproto.h     |  3 ++-
 fs/cifs/cifssmb.c       | 11 +++++---
 fs/cifs/connect.c       | 13 +++++++--
 fs/cifs/smb2misc.c      | 46 ++++++++++++++++++++++++++++++++
 fs/cifs/smb2ops.c       |  8 ++++--
 fs/cifs/smb2proto.h     |  7 +++++
 fs/cifs/smb2transport.c | 59 ++++++++++++++++++++++++++++++++++++-----
 fs/cifs/transport.c     |  2 ++
 9 files changed, 145 insertions(+), 15 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d42dd3288647..c34bdb12c8e6 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -243,6 +243,7 @@ struct smb_version_operations {
 	/* verify the message */
 	int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
 	bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
+	int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *);
 	void (*downgrade_oplock)(struct TCP_Server_Info *,
 					struct cifsInodeInfo *, bool);
 	/* process transaction2 response */
@@ -1343,6 +1344,7 @@ struct mid_q_entry {
 	void *callback_data;	  /* general purpose pointer for callback */
 	void *resp_buf;		/* pointer to received SMB header */
 	int mid_state;	/* wish this were enum but can not pass to wait_event */
+	unsigned int mid_flags;
 	__le16 command;		/* smb command code */
 	bool large_buf:1;	/* if valid response, is pointer to large buf */
 	bool multiRsp:1;	/* multiple trans2 responses for one request  */
@@ -1350,6 +1352,12 @@ struct mid_q_entry {
 	bool decrypted:1;	/* decrypted entry */
 };
 
+struct close_cancelled_open {
+	struct cifs_fid         fid;
+	struct cifs_tcon        *tcon;
+	struct work_struct      work;
+};
+
 /*	Make code in transport.c a little cleaner by moving
 	update of optional stats into function below */
 #ifdef CONFIG_CIFS_STATS2
@@ -1481,6 +1489,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
 #define   MID_RESPONSE_MALFORMED 0x10
 #define   MID_SHUTDOWN		 0x20
 
+/* Flags */
+#define   MID_WAIT_CANCELLED	 1 /* Cancelled while waiting for response */
+
 /* Types of response buffer returned from SendReceive2 */
 #define   CIFS_NO_BUFFER        0    /* Response buffer not returned */
 #define   CIFS_SMALL_BUFFER     1
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 97e5d236d265..ec5e5e514fdd 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -79,7 +79,8 @@ extern void cifs_delete_mid(struct mid_q_entry *mid);
 extern void cifs_wake_up_task(struct mid_q_entry *mid);
 extern int cifs_handle_standard(struct TCP_Server_Info *server,
 				struct mid_q_entry *mid);
-extern int cifs_discard_remaining_data(struct TCP_Server_Info *server);
+extern int cifs_discard_remaining_data(struct TCP_Server_Info *server,
+				       char *buf);
 extern int cifs_call_async(struct TCP_Server_Info *server,
 			struct smb_rqst *rqst,
 			mid_receive_t *receive, mid_callback_t *callback,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 066950671929..967b92631807 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1400,9 +1400,9 @@ openRetry:
  * current bigbuf.
  */
 int
-cifs_discard_remaining_data(struct TCP_Server_Info *server)
+cifs_discard_remaining_data(struct TCP_Server_Info *server, char *buf)
 {
-	unsigned int rfclen = get_rfc1002_length(server->smallbuf);
+	unsigned int rfclen = get_rfc1002_length(buf);
 	int remaining = rfclen + 4 - server->total_read;
 
 	while (remaining > 0) {
@@ -1426,7 +1426,7 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	int length;
 	struct cifs_readdata *rdata = mid->callback_data;
 
-	length = cifs_discard_remaining_data(server);
+	length = cifs_discard_remaining_data(server, mid->resp_buf);
 	dequeue_mid(mid, rdata->result);
 	return length;
 }
@@ -1459,7 +1459,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 
 	if (server->ops->is_status_pending &&
 	    server->ops->is_status_pending(buf, server, 0)) {
-		cifs_discard_remaining_data(server);
+		cifs_discard_remaining_data(server, buf);
 		return -1;
 	}
 
@@ -1519,6 +1519,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n",
 		 rdata->iov[0].iov_base, server->total_read);
 
+	mid->resp_buf = server->smallbuf;
+	server->smallbuf = NULL;
+
 	/* how much data is in the response? */
 	data_len = server->ops->read_data_length(buf);
 	if (data_offset + data_len > buflen) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 9ae695ae3ed7..0c7596cef4b8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -904,10 +904,19 @@ cifs_demultiplex_thread(void *p)
 
 		server->lstrp = jiffies;
 		if (mid_entry != NULL) {
+			if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) &&
+			     mid_entry->mid_state == MID_RESPONSE_RECEIVED &&
+					server->ops->handle_cancelled_mid)
+				server->ops->handle_cancelled_mid(
+							mid_entry->resp_buf,
+							server);
+
 			if (!mid_entry->multiRsp || mid_entry->multiEnd)
 				mid_entry->callback(mid_entry);
-		} else if (!server->ops->is_oplock_break ||
-			   !server->ops->is_oplock_break(buf, server)) {
+		} else if (server->ops->is_oplock_break &&
+			   server->ops->is_oplock_break(buf, server)) {
+			cifs_dbg(FYI, "Received oplock break\n");
+		} else {
 			cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n",
 				 atomic_read(&midCount));
 			cifs_dump_mem("Received Data is: ", buf,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index fd516ea8b8f8..1a04b3a5beb1 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -659,3 +659,49 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 	cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
 	return false;
 }
+
+void
+smb2_cancelled_close_fid(struct work_struct *work)
+{
+	struct close_cancelled_open *cancelled = container_of(work,
+					struct close_cancelled_open, work);
+
+	cifs_dbg(VFS, "Close unmatched open\n");
+
+	SMB2_close(0, cancelled->tcon, cancelled->fid.persistent_fid,
+		   cancelled->fid.volatile_fid);
+	cifs_put_tcon(cancelled->tcon);
+	kfree(cancelled);
+}
+
+int
+smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
+{
+	struct smb2_sync_hdr *sync_hdr = get_sync_hdr(buffer);
+	struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
+	struct cifs_tcon *tcon;
+	struct close_cancelled_open *cancelled;
+
+	if (sync_hdr->Command != SMB2_CREATE ||
+	    sync_hdr->Status != STATUS_SUCCESS)
+		return 0;
+
+	cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL);
+	if (!cancelled)
+		return -ENOMEM;
+
+	tcon = smb2_find_smb_tcon(server, sync_hdr->SessionId,
+				  sync_hdr->TreeId);
+	if (!tcon) {
+		kfree(cancelled);
+		return -ENOENT;
+	}
+
+	cancelled->fid.persistent_fid = rsp->PersistentFileId;
+	cancelled->fid.volatile_fid = rsp->VolatileFileId;
+	cancelled->tcon = tcon;
+	INIT_WORK(&cancelled->work, smb2_cancelled_close_fid);
+	queue_work(cifsiod_wq, &cancelled->work);
+
+	return 0;
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 0231108d9387..b6bdf93042eb 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2188,7 +2188,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 	if (rc)
 		goto free_pages;
 
-	rc = cifs_discard_remaining_data(server);
+	rc = cifs_discard_remaining_data(server, buf);
 	if (rc)
 		goto free_pages;
 
@@ -2214,7 +2214,7 @@ free_pages:
 	kfree(pages);
 	return rc;
 discard_data:
-	cifs_discard_remaining_data(server);
+	cifs_discard_remaining_data(server, buf);
 	goto free_pages;
 }
 
@@ -2322,6 +2322,7 @@ struct smb_version_operations smb20_operations = {
 	.clear_stats = smb2_clear_stats,
 	.print_stats = smb2_print_stats,
 	.is_oplock_break = smb2_is_valid_oplock_break,
+	.handle_cancelled_mid = smb2_handle_cancelled_mid,
 	.downgrade_oplock = smb2_downgrade_oplock,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
@@ -2404,6 +2405,7 @@ struct smb_version_operations smb21_operations = {
 	.clear_stats = smb2_clear_stats,
 	.print_stats = smb2_print_stats,
 	.is_oplock_break = smb2_is_valid_oplock_break,
+	.handle_cancelled_mid = smb2_handle_cancelled_mid,
 	.downgrade_oplock = smb2_downgrade_oplock,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
@@ -2488,6 +2490,7 @@ struct smb_version_operations smb30_operations = {
 	.print_stats = smb2_print_stats,
 	.dump_share_caps = smb2_dump_share_caps,
 	.is_oplock_break = smb2_is_valid_oplock_break,
+	.handle_cancelled_mid = smb2_handle_cancelled_mid,
 	.downgrade_oplock = smb2_downgrade_oplock,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
@@ -2582,6 +2585,7 @@ struct smb_version_operations smb311_operations = {
 	.print_stats = smb2_print_stats,
 	.dump_share_caps = smb2_dump_share_caps,
 	.is_oplock_break = smb2_is_valid_oplock_break,
+	.handle_cancelled_mid = smb2_handle_cancelled_mid,
 	.downgrade_oplock = smb2_downgrade_oplock,
 	.need_neg = smb2_need_neg,
 	.negotiate = smb2_negotiate,
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 69e35873b1de..6853454fc871 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -48,6 +48,10 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses,
 			      struct smb_rqst *rqst);
 extern struct mid_q_entry *smb2_setup_async_request(
 			struct TCP_Server_Info *server, struct smb_rqst *rqst);
+extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
+					   __u64 ses_id);
+extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server,
+						__u64 ses_id, __u32  tid);
 extern int smb2_calc_signature(struct smb_rqst *rqst,
 				struct TCP_Server_Info *server);
 extern int smb3_calc_signature(struct smb_rqst *rqst,
@@ -164,6 +168,9 @@ extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
 			     const u64 persistent_fid, const u64 volatile_fid,
 			     const __u8 oplock_level);
+extern int smb2_handle_cancelled_mid(char *buffer,
+					struct TCP_Server_Info *server);
+void smb2_cancelled_close_fid(struct work_struct *work);
 extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
 			 u64 persistent_file_id, u64 volatile_file_id,
 			 struct kstatfs *FSData);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 7c3bb1bd7eed..506b67fc93d9 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -115,23 +115,70 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
 	return 0;
 }
 
+static struct cifs_ses *
+smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
+{
+	struct cifs_ses *ses;
+
+	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+		if (ses->Suid != ses_id)
+			continue;
+		return ses;
+	}
+
+	return NULL;
+}
+
 struct cifs_ses *
 smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id)
 {
 	struct cifs_ses *ses;
 
 	spin_lock(&cifs_tcp_ses_lock);
-	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
-		if (ses->Suid != ses_id)
-			continue;
-		spin_unlock(&cifs_tcp_ses_lock);
-		return ses;
-	}
+	ses = smb2_find_smb_ses_unlocked(server, ses_id);
 	spin_unlock(&cifs_tcp_ses_lock);
 
+	return ses;
+}
+
+static struct cifs_tcon *
+smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32  tid)
+{
+	struct cifs_tcon *tcon;
+
+	list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+		if (tcon->tid != tid)
+			continue;
+		++tcon->tc_count;
+		return tcon;
+	}
+
 	return NULL;
 }
 
+/*
+ * Obtain tcon corresponding to the tid in the given
+ * cifs_ses
+ */
+
+struct cifs_tcon *
+smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32  tid)
+{
+	struct cifs_ses *ses;
+	struct cifs_tcon *tcon;
+
+	spin_lock(&cifs_tcp_ses_lock);
+	ses = smb2_find_smb_ses_unlocked(server, ses_id);
+	if (!ses) {
+		spin_unlock(&cifs_tcp_ses_lock);
+		return NULL;
+	}
+	tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid);
+	spin_unlock(&cifs_tcp_ses_lock);
+
+	return tcon;
+}
+
 int
 smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 526f0533cb4e..f6e13a977fc8 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -752,9 +752,11 @@ cifs_send_recv(const unsigned int xid, struct cifs_ses *ses,
 
 	rc = wait_for_response(ses->server, midQ);
 	if (rc != 0) {
+		cifs_dbg(FYI, "Cancelling wait for mid %llu\n",	midQ->mid);
 		send_cancel(ses->server, rqst, midQ);
 		spin_lock(&GlobalMid_Lock);
 		if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
+			midQ->mid_flags |= MID_WAIT_CANCELLED;
 			midQ->callback = DeleteMidQEntry;
 			spin_unlock(&GlobalMid_Lock);
 			add_credits(ses->server, 1, optype);

From 312bbc5946c4b73dfc1d64c1dd5b0f9df8016587 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Tue, 4 Apr 2017 02:12:04 -0500
Subject: [PATCH 269/410] SMB3: Rename clone_range to copychunk_range

Server side copy is one of the most important mechanisms smb2/smb3
supports and it was unintentionally disabled for most use cases.

Renaming calls to reflect the underlying smb2 ioctl called. This is
similar to the name duplicate_extents used for a similar ioctl which is
also used to duplicate files by reusing fs blocks. The name change is to
avoid confusion.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <smfrench@gmail.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/cifsglob.h |  3 ++-
 fs/cifs/ioctl.c    | 16 ++++++++--------
 fs/cifs/smb2ops.c  | 12 ++++++------
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c34bdb12c8e6..57c594827cb3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -408,7 +408,8 @@ struct smb_version_operations {
 	char * (*create_lease_buf)(u8 *, u8);
 	/* parse lease context buffer and return oplock/epoch info */
 	__u8 (*parse_lease_buf)(void *, unsigned int *);
-	int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file,
+	int (*copychunk_range)(const unsigned int,
+			struct cifsFileInfo *src_file,
 			struct cifsFileInfo *target_file, u64 src_off, u64 len,
 			u64 dest_off);
 	int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src,
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 001528781b6b..9bf0f94fae63 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -34,7 +34,7 @@
 #include "cifs_ioctl.h"
 #include <linux/btrfs.h>
 
-static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
+static int cifs_file_copychunk_range(unsigned int xid, struct file *src_file,
 			  struct file *dst_file)
 {
 	struct inode *src_inode = file_inode(src_file);
@@ -45,7 +45,7 @@ static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
 	struct cifs_tcon *target_tcon;
 	int rc;
 
-	cifs_dbg(FYI, "ioctl clone range\n");
+	cifs_dbg(FYI, "ioctl copychunk range\n");
 
 	if (!src_file->private_data || !dst_file->private_data) {
 		rc = -EBADF;
@@ -75,8 +75,8 @@ static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
 	/* should we flush first and last page first */
 	truncate_inode_pages(&target_inode->i_data, 0);
 
-	if (target_tcon->ses->server->ops->clone_range)
-		rc = target_tcon->ses->server->ops->clone_range(xid,
+	if (target_tcon->ses->server->ops->copychunk_range)
+		rc = target_tcon->ses->server->ops->copychunk_range(xid,
 			smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
 	else
 		rc = -EOPNOTSUPP;
@@ -91,14 +91,14 @@ out:
 	return rc;
 }
 
-static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
+static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file,
 			unsigned long srcfd)
 {
 	int rc;
 	struct fd src_file;
 	struct inode *src_inode;
 
-	cifs_dbg(FYI, "ioctl clone range\n");
+	cifs_dbg(FYI, "ioctl copychunk range\n");
 	/* the destination must be opened for writing */
 	if (!(dst_file->f_mode & FMODE_WRITE)) {
 		cifs_dbg(FYI, "file target not open for write\n");
@@ -129,7 +129,7 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
 	if (S_ISDIR(src_inode->i_mode))
 		goto out_fput;
 
-	rc = cifs_file_clone_range(xid, src_file.file, dst_file);
+	rc = cifs_file_copychunk_range(xid, src_file.file, dst_file);
 
 out_fput:
 	fdput(src_file);
@@ -251,7 +251,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 			}
 			break;
 		case CIFS_IOC_COPYCHUNK_FILE:
-			rc = cifs_ioctl_clone(xid, filep, arg);
+			rc = cifs_ioctl_copychunk(xid, filep, arg);
 			break;
 		case CIFS_IOC_SET_INTEGRITY:
 			if (pSMBFile == NULL)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b6bdf93042eb..3f12e0992b9b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -593,7 +593,7 @@ req_res_key_exit:
 }
 
 static int
-smb2_clone_range(const unsigned int xid,
+smb2_copychunk_range(const unsigned int xid,
 			struct cifsFileInfo *srcfile,
 			struct cifsFileInfo *trgtfile, u64 src_off,
 			u64 len, u64 dest_off)
@@ -611,7 +611,7 @@ smb2_clone_range(const unsigned int xid,
 	if (pcchunk == NULL)
 		return -ENOMEM;
 
-	cifs_dbg(FYI, "in smb2_clone_range - about to call request res key\n");
+	cifs_dbg(FYI, "in smb2_copychunk_range - about to call request res key\n");
 	/* Request a key from the server to identify the source of the copy */
 	rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink),
 				srcfile->fid.persistent_fid,
@@ -2378,7 +2378,7 @@ struct smb_version_operations smb20_operations = {
 	.set_oplock_level = smb2_set_oplock_level,
 	.create_lease_buf = smb2_create_lease_buf,
 	.parse_lease_buf = smb2_parse_lease_buf,
-	.clone_range = smb2_clone_range,
+	.copychunk_range = smb2_copychunk_range,
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.get_dfs_refer = smb2_get_dfs_refer,
@@ -2461,7 +2461,7 @@ struct smb_version_operations smb21_operations = {
 	.set_oplock_level = smb21_set_oplock_level,
 	.create_lease_buf = smb2_create_lease_buf,
 	.parse_lease_buf = smb2_parse_lease_buf,
-	.clone_range = smb2_clone_range,
+	.copychunk_range = smb2_copychunk_range,
 	.wp_retry_size = smb2_wp_retry_size,
 	.dir_needs_close = smb2_dir_needs_close,
 	.enum_snapshots = smb3_enum_snapshots,
@@ -2548,7 +2548,7 @@ struct smb_version_operations smb30_operations = {
 	.set_oplock_level = smb3_set_oplock_level,
 	.create_lease_buf = smb3_create_lease_buf,
 	.parse_lease_buf = smb3_parse_lease_buf,
-	.clone_range = smb2_clone_range,
+	.copychunk_range = smb2_copychunk_range,
 	.duplicate_extents = smb2_duplicate_extents,
 	.validate_negotiate = smb3_validate_negotiate,
 	.wp_retry_size = smb2_wp_retry_size,
@@ -2643,7 +2643,7 @@ struct smb_version_operations smb311_operations = {
 	.set_oplock_level = smb3_set_oplock_level,
 	.create_lease_buf = smb3_create_lease_buf,
 	.parse_lease_buf = smb3_parse_lease_buf,
-	.clone_range = smb2_clone_range,
+	.copychunk_range = smb2_copychunk_range,
 	.duplicate_extents = smb2_duplicate_extents,
 /*	.validate_negotiate = smb3_validate_negotiate, */ /* not used in 3.11 */
 	.wp_retry_size = smb2_wp_retry_size,

From 620d8745b35daaf507186c26b40c7ea02aed131e Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Fri, 10 Feb 2017 16:03:51 +0530
Subject: [PATCH 270/410] Introduce cifs_copy_file_range()

The earlier changes to copy range for cifs unintentionally disabled the more
common form of server side copy.

The patch introduces the file_operations helper cifs_copy_file_range()
which is used by the syscall copy_file_range. The new file operations
helper allows us to perform server side copies for SMB2.0 and 2.1
servers as well as SMB 3.0+ servers which do not support the ioctl
FSCTL_DUPLICATE_EXTENTS_TO_FILE.

The new helper uses the ioctl FSCTL_SRV_COPYCHUNK_WRITE to perform
server side copies. The helper is called by vfs_copy_file_range() only
once an attempt to clone the file using the ioctl
FSCTL_DUPLICATE_EXTENTS_TO_FILE has failed.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable  <stable@vger.kernel.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsfs.c   | 87 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/cifsfs.h   |  5 +++
 fs/cifs/cifsglob.h |  6 ++--
 fs/cifs/ioctl.c    | 60 ++------------------------------
 fs/cifs/smb2ops.c  | 20 +++++++----
 5 files changed, 110 insertions(+), 68 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 15e1db8738ae..dd3f5fabfdf6 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -972,6 +972,86 @@ out:
 	return rc;
 }
 
+ssize_t cifs_file_copychunk_range(unsigned int xid,
+				struct file *src_file, loff_t off,
+				struct file *dst_file, loff_t destoff,
+				size_t len, unsigned int flags)
+{
+	struct inode *src_inode = file_inode(src_file);
+	struct inode *target_inode = file_inode(dst_file);
+	struct cifsFileInfo *smb_file_src;
+	struct cifsFileInfo *smb_file_target;
+	struct cifs_tcon *src_tcon;
+	struct cifs_tcon *target_tcon;
+	ssize_t rc;
+
+	cifs_dbg(FYI, "copychunk range\n");
+
+	if (src_inode == target_inode) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (!src_file->private_data || !dst_file->private_data) {
+		rc = -EBADF;
+		cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
+		goto out;
+	}
+
+	rc = -EXDEV;
+	smb_file_target = dst_file->private_data;
+	smb_file_src = src_file->private_data;
+	src_tcon = tlink_tcon(smb_file_src->tlink);
+	target_tcon = tlink_tcon(smb_file_target->tlink);
+
+	if (src_tcon->ses != target_tcon->ses) {
+		cifs_dbg(VFS, "source and target of copy not on same server\n");
+		goto out;
+	}
+
+	/*
+	 * Note: cifs case is easier than btrfs since server responsible for
+	 * checks for proper open modes and file type and if it wants
+	 * server could even support copy of range where source = target
+	 */
+	lock_two_nondirectories(target_inode, src_inode);
+
+	cifs_dbg(FYI, "about to flush pages\n");
+	/* should we flush first and last page first */
+	truncate_inode_pages(&target_inode->i_data, 0);
+
+	if (target_tcon->ses->server->ops->copychunk_range)
+		rc = target_tcon->ses->server->ops->copychunk_range(xid,
+			smb_file_src, smb_file_target, off, len, destoff);
+	else
+		rc = -EOPNOTSUPP;
+
+	/* force revalidate of size and timestamps of target file now
+	 * that target is updated on the server
+	 */
+	CIFS_I(target_inode)->time = 0;
+	/* although unlocking in the reverse order from locking is not
+	 * strictly necessary here it is a little cleaner to be consistent
+	 */
+	unlock_two_nondirectories(src_inode, target_inode);
+
+out:
+	return rc;
+}
+
+static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
+				struct file *dst_file, loff_t destoff,
+				size_t len, unsigned int flags)
+{
+	unsigned int xid = get_xid();
+	ssize_t rc;
+
+	rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
+					len, flags);
+	free_xid(xid);
+	return rc;
+}
+
 const struct file_operations cifs_file_ops = {
 	.read_iter = cifs_loose_read_iter,
 	.write_iter = cifs_file_write_iter,
@@ -984,6 +1064,7 @@ const struct file_operations cifs_file_ops = {
 	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1001,6 +1082,7 @@ const struct file_operations cifs_file_strict_ops = {
 	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1018,6 +1100,7 @@ const struct file_operations cifs_file_direct_ops = {
 	.mmap = cifs_file_mmap,
 	.splice_read = generic_file_splice_read,
 	.unlocked_ioctl  = cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
@@ -1035,6 +1118,7 @@ const struct file_operations cifs_file_nobrl_ops = {
 	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1051,6 +1135,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 	.splice_read = generic_file_splice_read,
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1067,6 +1152,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.mmap = cifs_file_mmap,
 	.splice_read = generic_file_splice_read,
 	.unlocked_ioctl  = cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
@@ -1078,6 +1164,7 @@ const struct file_operations cifs_dir_ops = {
 	.release = cifs_closedir,
 	.read    = generic_read_dir,
 	.unlocked_ioctl  = cifs_ioctl,
+	.copy_file_range = cifs_copy_file_range,
 	.clone_file_range = cifs_clone_file_range,
 	.llseek = generic_file_llseek,
 };
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index da717fee3026..30bf89b1fd9a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -139,6 +139,11 @@ extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 # define cifs_listxattr NULL
 #endif
 
+extern ssize_t cifs_file_copychunk_range(unsigned int xid,
+					struct file *src_file, loff_t off,
+					struct file *dst_file, loff_t destoff,
+					size_t len, unsigned int flags);
+
 extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 #ifdef CONFIG_CIFS_NFSD_EXPORT
 extern const struct export_operations cifs_export_ops;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 57c594827cb3..d07f13a63369 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -408,10 +408,10 @@ struct smb_version_operations {
 	char * (*create_lease_buf)(u8 *, u8);
 	/* parse lease context buffer and return oplock/epoch info */
 	__u8 (*parse_lease_buf)(void *, unsigned int *);
-	int (*copychunk_range)(const unsigned int,
+	ssize_t (*copychunk_range)(const unsigned int,
 			struct cifsFileInfo *src_file,
-			struct cifsFileInfo *target_file, u64 src_off, u64 len,
-			u64 dest_off);
+			struct cifsFileInfo *target_file,
+			u64 src_off, u64 len, u64 dest_off);
 	int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src,
 			struct cifsFileInfo *target_file, u64 src_off, u64 len,
 			u64 dest_off);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 9bf0f94fae63..265c45fe4ea5 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -34,63 +34,6 @@
 #include "cifs_ioctl.h"
 #include <linux/btrfs.h>
 
-static int cifs_file_copychunk_range(unsigned int xid, struct file *src_file,
-			  struct file *dst_file)
-{
-	struct inode *src_inode = file_inode(src_file);
-	struct inode *target_inode = file_inode(dst_file);
-	struct cifsFileInfo *smb_file_src;
-	struct cifsFileInfo *smb_file_target;
-	struct cifs_tcon *src_tcon;
-	struct cifs_tcon *target_tcon;
-	int rc;
-
-	cifs_dbg(FYI, "ioctl copychunk range\n");
-
-	if (!src_file->private_data || !dst_file->private_data) {
-		rc = -EBADF;
-		cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
-		goto out;
-	}
-
-	rc = -EXDEV;
-	smb_file_target = dst_file->private_data;
-	smb_file_src = src_file->private_data;
-	src_tcon = tlink_tcon(smb_file_src->tlink);
-	target_tcon = tlink_tcon(smb_file_target->tlink);
-
-	if (src_tcon->ses != target_tcon->ses) {
-		cifs_dbg(VFS, "source and target of copy not on same server\n");
-		goto out;
-	}
-
-	/*
-	 * Note: cifs case is easier than btrfs since server responsible for
-	 * checks for proper open modes and file type and if it wants
-	 * server could even support copy of range where source = target
-	 */
-	lock_two_nondirectories(target_inode, src_inode);
-
-	cifs_dbg(FYI, "about to flush pages\n");
-	/* should we flush first and last page first */
-	truncate_inode_pages(&target_inode->i_data, 0);
-
-	if (target_tcon->ses->server->ops->copychunk_range)
-		rc = target_tcon->ses->server->ops->copychunk_range(xid,
-			smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
-	else
-		rc = -EOPNOTSUPP;
-
-	/* force revalidate of size and timestamps of target file now
-	   that target is updated on the server */
-	CIFS_I(target_inode)->time = 0;
-	/* although unlocking in the reverse order from locking is not
-	   strictly necessary here it is a little cleaner to be consistent */
-	unlock_two_nondirectories(src_inode, target_inode);
-out:
-	return rc;
-}
-
 static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file,
 			unsigned long srcfd)
 {
@@ -129,7 +72,8 @@ static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file,
 	if (S_ISDIR(src_inode->i_mode))
 		goto out_fput;
 
-	rc = cifs_file_copychunk_range(xid, src_file.file, dst_file);
+	rc = cifs_file_copychunk_range(xid, src_file.file, 0, dst_file, 0,
+					src_inode->i_size, 0);
 
 out_fput:
 	fdput(src_file);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 3f12e0992b9b..063e59d543f9 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -592,7 +592,7 @@ req_res_key_exit:
 	return rc;
 }
 
-static int
+static ssize_t
 smb2_copychunk_range(const unsigned int xid,
 			struct cifsFileInfo *srcfile,
 			struct cifsFileInfo *trgtfile, u64 src_off,
@@ -605,6 +605,7 @@ smb2_copychunk_range(const unsigned int xid,
 	struct cifs_tcon *tcon;
 	int chunks_copied = 0;
 	bool chunk_sizes_updated = false;
+	ssize_t bytes_written, total_bytes_written = 0;
 
 	pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL);
 
@@ -669,14 +670,16 @@ smb2_copychunk_range(const unsigned int xid,
 			}
 			chunks_copied++;
 
-			src_off += le32_to_cpu(retbuf->TotalBytesWritten);
-			dest_off += le32_to_cpu(retbuf->TotalBytesWritten);
-			len -= le32_to_cpu(retbuf->TotalBytesWritten);
+			bytes_written = le32_to_cpu(retbuf->TotalBytesWritten);
+			src_off += bytes_written;
+			dest_off += bytes_written;
+			len -= bytes_written;
+			total_bytes_written += bytes_written;
 
-			cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n",
+			cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %zu\n",
 				le32_to_cpu(retbuf->ChunksWritten),
 				le32_to_cpu(retbuf->ChunkBytesWritten),
-				le32_to_cpu(retbuf->TotalBytesWritten));
+				bytes_written);
 		} else if (rc == -EINVAL) {
 			if (ret_data_len != sizeof(struct copychunk_ioctl_rsp))
 				goto cchunk_out;
@@ -713,7 +716,10 @@ smb2_copychunk_range(const unsigned int xid,
 cchunk_out:
 	kfree(pcchunk);
 	kfree(retbuf);
-	return rc;
+	if (rc)
+		return rc;
+	else
+		return total_bytes_written;
 }
 
 static int

From 4fa8e504e5c0d7db9280ac96a4ac92192f1041f5 Mon Sep 17 00:00:00 2001
From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Thu, 30 Mar 2017 12:34:14 +0200
Subject: [PATCH 271/410] CIFS: Fix build failure with smb2

I saw the following build error during a randconfig build:

fs/cifs/smb2ops.c: In function 'smb2_new_lease_key':
fs/cifs/smb2ops.c:1104:2: error: implicit declaration of function 'generate_random_uuid' [-Werror=implicit-function-declaration]

Explicit include the right header to fix this issue.

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2ops.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 063e59d543f9..7b12a727947e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -21,6 +21,7 @@
 #include <linux/vfs.h>
 #include <linux/falloc.h>
 #include <linux/scatterlist.h>
+#include <linux/uuid.h>
 #include <crypto/aead.h>
 #include "cifsglob.h"
 #include "smb2pdu.h"

From 806a28efe9b78ffae5e2757e1ee924b8e50c08ab Mon Sep 17 00:00:00 2001
From: Jan-Marek Glogowski <glogow@fbihome.de>
Date: Mon, 20 Feb 2017 12:25:58 +0100
Subject: [PATCH 272/410] Reset TreeId to zero on SMB2 TREE_CONNECT

Currently the cifs module breaks the CIFS specs on reconnect as
described in http://msdn.microsoft.com/en-us/library/cc246529.aspx:

"TreeId (4 bytes): Uniquely identifies the tree connect for the
command. This MUST be 0 for the SMB2 TREE_CONNECT Request."

Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Tested-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <smfrench@gmail.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/smb2pdu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 7446496850a3..66fa1b941cdf 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1185,6 +1185,10 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 		return -EINVAL;
 	}
 
+	/* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
+	if (tcon)
+		tcon->tid = 0;
+
 	rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req);
 	if (rc) {
 		kfree(unc_path);

From 2e123b44a3c19de75f40ee0081d6d4fc04adfdc7 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 8 Mar 2017 02:14:25 +0200
Subject: [PATCH 273/410] virtio_net: enable big packets for large MTU values

If one enables e.g. jumbo frames without mergeable
buffers, packets won't fit in 1500 byte buffers
we use. Switch to big packet mode instead.
TODO: make sizing more exact, possibly extend small
packet mode to use larger pages.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index ea9890d61967..006f1a603102 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2367,6 +2367,10 @@ static int virtnet_probe(struct virtio_device *vdev)
 			dev->mtu = mtu;
 			dev->max_mtu = mtu;
 		}
+
+		/* TODO: size buffers correctly in this case. */
+		if (dev->mtu > ETH_DATA_LEN)
+			vi->big_packets = true;
 	}
 
 	if (vi->any_header_sg)

From 404123c2db798027e852480ed9c4accef9f1d9e6 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 29 Mar 2017 19:06:20 +0300
Subject: [PATCH 274/410] virtio: allow drivers to validate features

Some drivers can't support all features in all configurations.  At the
moment we blindly set FEATURES_OK and later FAILED.  Support this better
by adding a callback drivers can use to do some early checks.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio.c | 6 ++++++
 include/linux/virtio.h  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 400d70b69379..48230a5e12f2 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -232,6 +232,12 @@ static int virtio_dev_probe(struct device *_d)
 		if (device_features & (1ULL << i))
 			__virtio_set_bit(dev, i);
 
+	if (drv->validate) {
+		err = drv->validate(dev);
+		if (err)
+			goto err;
+	}
+
 	err = virtio_finalize_features(dev);
 	if (err)
 		goto err;
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 04b0d3f95043..7edfbdb55a99 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -167,6 +167,7 @@ struct virtio_driver {
 	unsigned int feature_table_size;
 	const unsigned int *feature_table_legacy;
 	unsigned int feature_table_size_legacy;
+	int (*validate)(struct virtio_device *dev);
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);

From fe36cbe0671e868cbd2f534a50ac60273fa5acf2 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 29 Mar 2017 19:09:14 +0300
Subject: [PATCH 275/410] virtio_net: clear MTU when out of range

virtio attempts to clear the MTU feature bit if the value is out of the
supported range, but this has no real effect since FEATURES_OK has
already been set.

Fix this up by checking the MTU in the new validate callback.

Fixes: 14de9d114a82 ("virtio-net: Add initial MTU advice feature")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 41 +++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 006f1a603102..f36584616e7d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2230,14 +2230,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev)
 #define MIN_MTU ETH_MIN_MTU
 #define MAX_MTU ETH_MAX_MTU
 
-static int virtnet_probe(struct virtio_device *vdev)
+static int virtnet_validate(struct virtio_device *vdev)
 {
-	int i, err;
-	struct net_device *dev;
-	struct virtnet_info *vi;
-	u16 max_queue_pairs;
-	int mtu;
-
 	if (!vdev->config->get) {
 		dev_err(&vdev->dev, "%s failure: config access disabled\n",
 			__func__);
@@ -2247,6 +2241,25 @@ static int virtnet_probe(struct virtio_device *vdev)
 	if (!virtnet_validate_features(vdev))
 		return -EINVAL;
 
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
+		int mtu = virtio_cread16(vdev,
+					 offsetof(struct virtio_net_config,
+						  mtu));
+		if (mtu < MIN_MTU)
+			__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
+	}
+
+	return 0;
+}
+
+static int virtnet_probe(struct virtio_device *vdev)
+{
+	int i, err;
+	struct net_device *dev;
+	struct virtnet_info *vi;
+	u16 max_queue_pairs;
+	int mtu;
+
 	/* Find if host supports multiqueue virtio_net device */
 	err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
 				   struct virtio_net_config,
@@ -2362,12 +2375,17 @@ static int virtnet_probe(struct virtio_device *vdev)
 				     offsetof(struct virtio_net_config,
 					      mtu));
 		if (mtu < dev->min_mtu) {
-			__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
-		} else {
-			dev->mtu = mtu;
-			dev->max_mtu = mtu;
+			/* Should never trigger: MTU was previously validated
+			 * in virtnet_validate.
+			 */
+			dev_err(&vdev->dev, "device MTU appears to have changed "
+				"it is now %d < %d", mtu, dev->min_mtu);
+			goto free_stats;
 		}
 
+		dev->mtu = mtu;
+		dev->max_mtu = mtu;
+
 		/* TODO: size buffers correctly in this case. */
 		if (dev->mtu > ETH_DATA_LEN)
 			vi->big_packets = true;
@@ -2548,6 +2566,7 @@ static struct virtio_driver virtio_net_driver = {
 	.driver.name =	KBUILD_MODNAME,
 	.driver.owner =	THIS_MODULE,
 	.id_table =	id_table,
+	.validate =	virtnet_validate,
 	.probe =	virtnet_probe,
 	.remove =	virtnet_remove,
 	.config_changed = virtnet_config_changed,

From 2055997f983c6db7b5c3940ce5f8f822657d5bc3 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 29 Mar 2017 23:22:04 +0300
Subject: [PATCH 276/410] virtio_console: fix uninitialized variable use

We try to disable callbacks on c_ivq even without multiport
even though that vq is not initialized in this configuration.

Fixes: c743d09dbd01 ("virtio: console: Disable callbacks for virtqueues at start of S4 freeze")
Suggested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/char/virtio_console.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index e9b7e0b3cabe..87fe111d0be6 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -2202,14 +2202,16 @@ static int virtcons_freeze(struct virtio_device *vdev)
 
 	vdev->config->reset(vdev);
 
-	virtqueue_disable_cb(portdev->c_ivq);
+	if (use_multiport(portdev))
+		virtqueue_disable_cb(portdev->c_ivq);
 	cancel_work_sync(&portdev->control_work);
 	cancel_work_sync(&portdev->config_work);
 	/*
 	 * Once more: if control_work_handler() was running, it would
 	 * enable the cb as the last step.
 	 */
-	virtqueue_disable_cb(portdev->c_ivq);
+	if (use_multiport(portdev))
+		virtqueue_disable_cb(portdev->c_ivq);
 	remove_controlq_data(portdev);
 
 	list_for_each_entry(port, &portdev->ports, list) {

From 404a5c392dccf1ebcfc54bc39d2c200eb30f8fff Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Thu, 30 Mar 2017 13:13:32 +0200
Subject: [PATCH 277/410] MAINTAINERS: fix virtio file pattern

The pattern did not catch include/linux/virtio.h.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1b0a87ffffab..d08e3be4bb50 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13295,7 +13295,7 @@ F:	drivers/virtio/
 F:	tools/virtio/
 F:	drivers/net/virtio_net.c
 F:	drivers/block/virtio_blk.c
-F:	include/linux/virtio_*.h
+F:	include/linux/virtio*.h
 F:	include/uapi/linux/virtio_*.h
 F:	drivers/crypto/virtio/
 

From 81380ca10778b99dce98940cfc993214712df335 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Fri, 7 Apr 2017 08:56:26 -0600
Subject: [PATCH 278/410] blk-mq: use the right hctx when getting a driver tag
 fails

While dispatching requests, if we fail to get a driver tag, we mark the
hardware queue as waiting for a tag and put the requests on a
hctx->dispatch list to be run later when a driver tag is freed. However,
blk_mq_dispatch_rq_list() may dispatch requests from multiple hardware
queues if using a single-queue scheduler with a multiqueue device. If
blk_mq_get_driver_tag() fails, it doesn't update the hardware queue we
are processing. This means we end up using the hardware queue of the
previous request, which may or may not be the same as that of the
current request. If it isn't, the wrong hardware queue will end up
waiting for a tag, and the requests will be on the wrong dispatch list,
leading to a hang.

The fix is twofold:

1. Make sure we save which hardware queue we were trying to get a
   request for in blk_mq_get_driver_tag() regardless of whether it
   succeeds or not.
2. Make blk_mq_dispatch_rq_list() take a request_queue instead of a
   blk_mq_hw_queue to make it clear that it must handle multiple
   hardware queues, since I've already messed this up on a couple of
   occasions.

This didn't appear in testing with nvme and mq-deadline because nvme has
more driver tags than the default number of scheduler tags. However,
with the blk_mq_update_nr_hw_queues() fix, it showed up with nbd.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c |  9 +++++----
 block/blk-mq.c       | 25 +++++++++++++------------
 block/blk-mq.h       |  2 +-
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 09af8ff18719..fc00f00898d3 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -171,7 +171,8 @@ void blk_mq_sched_put_request(struct request *rq)
 
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 {
-	struct elevator_queue *e = hctx->queue->elevator;
+	struct request_queue *q = hctx->queue;
+	struct elevator_queue *e = q->elevator;
 	const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
 	bool did_work = false;
 	LIST_HEAD(rq_list);
@@ -203,10 +204,10 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 	 */
 	if (!list_empty(&rq_list)) {
 		blk_mq_sched_mark_restart_hctx(hctx);
-		did_work = blk_mq_dispatch_rq_list(hctx, &rq_list);
+		did_work = blk_mq_dispatch_rq_list(q, &rq_list);
 	} else if (!has_sched_dispatch) {
 		blk_mq_flush_busy_ctxs(hctx, &rq_list);
-		blk_mq_dispatch_rq_list(hctx, &rq_list);
+		blk_mq_dispatch_rq_list(q, &rq_list);
 	}
 
 	/*
@@ -222,7 +223,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
 			if (!rq)
 				break;
 			list_add(&rq->queuelist, &rq_list);
-		} while (blk_mq_dispatch_rq_list(hctx, &rq_list));
+		} while (blk_mq_dispatch_rq_list(q, &rq_list));
 	}
 }
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 935f2cc7c8c3..828f4bd193f2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -845,12 +845,8 @@ bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
 		.flags = wait ? 0 : BLK_MQ_REQ_NOWAIT,
 	};
 
-	if (rq->tag != -1) {
-done:
-		if (hctx)
-			*hctx = data.hctx;
-		return true;
-	}
+	if (rq->tag != -1)
+		goto done;
 
 	if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
 		data.flags |= BLK_MQ_REQ_RESERVED;
@@ -862,10 +858,12 @@ done:
 			atomic_inc(&data.hctx->nr_active);
 		}
 		data.hctx->tags->rqs[rq->tag] = rq;
-		goto done;
 	}
 
-	return false;
+done:
+	if (hctx)
+		*hctx = data.hctx;
+	return rq->tag != -1;
 }
 
 static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
@@ -962,14 +960,17 @@ static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
 	return true;
 }
 
-bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
+bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list)
 {
-	struct request_queue *q = hctx->queue;
+	struct blk_mq_hw_ctx *hctx;
 	struct request *rq;
 	LIST_HEAD(driver_list);
 	struct list_head *dptr;
 	int errors, queued, ret = BLK_MQ_RQ_QUEUE_OK;
 
+	if (list_empty(list))
+		return false;
+
 	/*
 	 * Start off with dptr being NULL, so we start the first request
 	 * immediately, even if we have more pending.
@@ -980,7 +981,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 	 * Now process all the entries, sending them to the driver.
 	 */
 	errors = queued = 0;
-	while (!list_empty(list)) {
+	do {
 		struct blk_mq_queue_data bd;
 
 		rq = list_first_entry(list, struct request, queuelist);
@@ -1051,7 +1052,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 		 */
 		if (!dptr && list->next != list->prev)
 			dptr = &driver_list;
-	}
+	} while (!list_empty(list));
 
 	hctx->dispatched[queued_to_index(queued)]++;
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index b79f9a7d8cf6..660a17e1d033 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -31,7 +31,7 @@ void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
-bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *, struct list_head *);
+bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
 bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,

From 6917ff0b5bd4139e08a3f3146529dcb3b95ba7a6 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 5 Apr 2017 12:01:30 -0700
Subject: [PATCH 279/410] blk-mq-sched: refactor scheduler initialization

Preparation cleanup for the next couple of fixes, push
blk_mq_sched_setup() and e->ops.mq.init_sched() into a helper.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c | 86 +++++++++++++++++++++++---------------------
 block/blk-mq-sched.h |  2 +-
 block/elevator.c     | 32 +++++++----------
 3 files changed, 59 insertions(+), 61 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index fc00f00898d3..6bd1758ea29b 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -432,51 +432,23 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
 	}
 }
 
-int blk_mq_sched_setup(struct request_queue *q)
+static int blk_mq_sched_alloc_tags(struct request_queue *q,
+				   struct blk_mq_hw_ctx *hctx,
+				   unsigned int hctx_idx)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
-	struct blk_mq_hw_ctx *hctx;
-	int ret, i;
+	int ret;
 
-	/*
-	 * Default to 256, since we don't split into sync/async like the
-	 * old code did. Additionally, this is a per-hw queue depth.
-	 */
-	q->nr_requests = 2 * BLKDEV_MAX_RQ;
+	hctx->sched_tags = blk_mq_alloc_rq_map(set, hctx_idx, q->nr_requests,
+					       set->reserved_tags);
+	if (!hctx->sched_tags)
+		return -ENOMEM;
 
-	/*
-	 * We're switching to using an IO scheduler, so setup the hctx
-	 * scheduler tags and switch the request map from the regular
-	 * tags to scheduler tags. First allocate what we need, so we
-	 * can safely fail and fallback, if needed.
-	 */
-	ret = 0;
-	queue_for_each_hw_ctx(q, hctx, i) {
-		hctx->sched_tags = blk_mq_alloc_rq_map(set, i,
-				q->nr_requests, set->reserved_tags);
-		if (!hctx->sched_tags) {
-			ret = -ENOMEM;
-			break;
-		}
-		ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
-		if (ret)
-			break;
-	}
+	ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
+	if (ret)
+		blk_mq_sched_free_tags(set, hctx, hctx_idx);
 
-	/*
-	 * If we failed, free what we did allocate
-	 */
-	if (ret) {
-		queue_for_each_hw_ctx(q, hctx, i) {
-			if (!hctx->sched_tags)
-				continue;
-			blk_mq_sched_free_tags(set, hctx, i);
-		}
-
-		return ret;
-	}
-
-	return 0;
+	return ret;
 }
 
 void blk_mq_sched_teardown(struct request_queue *q)
@@ -489,6 +461,40 @@ void blk_mq_sched_teardown(struct request_queue *q)
 		blk_mq_sched_free_tags(set, hctx, i);
 }
 
+int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+	int ret;
+
+	if (!e) {
+		q->elevator = NULL;
+		return 0;
+	}
+
+	/*
+	 * Default to 256, since we don't split into sync/async like the
+	 * old code did. Additionally, this is a per-hw queue depth.
+	 */
+	q->nr_requests = 2 * BLKDEV_MAX_RQ;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		ret = blk_mq_sched_alloc_tags(q, hctx, i);
+		if (ret)
+			goto err;
+	}
+
+	ret = e->ops.mq.init_sched(q, e);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	blk_mq_sched_teardown(q);
+	return ret;
+}
+
 int blk_mq_sched_init(struct request_queue *q)
 {
 	int ret;
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index a75b16b123f7..873f9af5a35b 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -32,7 +32,7 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
 			struct list_head *rq_list,
 			struct request *(*get_rq)(struct blk_mq_hw_ctx *));
 
-int blk_mq_sched_setup(struct request_queue *q);
+int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
 void blk_mq_sched_teardown(struct request_queue *q);
 
 int blk_mq_sched_init(struct request_queue *q);
diff --git a/block/elevator.c b/block/elevator.c
index 01139f549b5b..f236ef1d2be9 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -242,17 +242,12 @@ int elevator_init(struct request_queue *q, char *name)
 		}
 	}
 
-	if (e->uses_mq) {
-		err = blk_mq_sched_setup(q);
-		if (!err)
-			err = e->ops.mq.init_sched(q, e);
-	} else
+	if (e->uses_mq)
+		err = blk_mq_init_sched(q, e);
+	else
 		err = e->ops.sq.elevator_init_fn(q, e);
-	if (err) {
-		if (e->uses_mq)
-			blk_mq_sched_teardown(q);
+	if (err)
 		elevator_put(e);
-	}
 	return err;
 }
 EXPORT_SYMBOL(elevator_init);
@@ -987,21 +982,18 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	}
 
 	/* allocate, init and register new elevator */
-	if (new_e) {
-		if (new_e->uses_mq) {
-			err = blk_mq_sched_setup(q);
-			if (!err)
-				err = new_e->ops.mq.init_sched(q, new_e);
-		} else
-			err = new_e->ops.sq.elevator_init_fn(q, new_e);
-		if (err)
-			goto fail_init;
+	if (q->mq_ops)
+		err = blk_mq_init_sched(q, new_e);
+	else
+		err = new_e->ops.sq.elevator_init_fn(q, new_e);
+	if (err)
+		goto fail_init;
 
+	if (new_e) {
 		err = elv_register_queue(q);
 		if (err)
 			goto fail_register;
-	} else
-		q->elevator = NULL;
+	}
 
 	/* done, kill the old one and finish */
 	if (old) {

From 93252632e828da3e90241a1c0e766556abf71598 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 5 Apr 2017 12:01:31 -0700
Subject: [PATCH 280/410] blk-mq-sched: set up scheduler tags when bringing up
 new queues

If a new hardware queue is added at runtime, we don't allocate scheduler
tags for it, leading to a crash. This hooks up the scheduler framework
to blk_mq_{init,exit}_hctx() to make sure everything gets properly
initialized/freed.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c | 22 ++++++++++++++++++++++
 block/blk-mq-sched.h |  5 +++++
 block/blk-mq.c       |  9 ++++++++-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 6bd1758ea29b..0bb13bb51daa 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -461,6 +461,28 @@ void blk_mq_sched_teardown(struct request_queue *q)
 		blk_mq_sched_free_tags(set, hctx, i);
 }
 
+int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+			   unsigned int hctx_idx)
+{
+	struct elevator_queue *e = q->elevator;
+
+	if (!e)
+		return 0;
+
+	return blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
+}
+
+void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+			    unsigned int hctx_idx)
+{
+	struct elevator_queue *e = q->elevator;
+
+	if (!e)
+		return;
+
+	blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
+}
+
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 {
 	struct blk_mq_hw_ctx *hctx;
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 873f9af5a35b..19db25e0c95a 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -35,6 +35,11 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
 void blk_mq_sched_teardown(struct request_queue *q);
 
+int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+			   unsigned int hctx_idx);
+void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
+			    unsigned int hctx_idx);
+
 int blk_mq_sched_init(struct request_queue *q);
 
 static inline bool
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 828f4bd193f2..72e744cd638c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1924,6 +1924,8 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 				       hctx->fq->flush_rq, hctx_idx,
 				       flush_start_tag + hctx_idx);
 
+	blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
+
 	if (set->ops->exit_hctx)
 		set->ops->exit_hctx(hctx, hctx_idx);
 
@@ -1990,9 +1992,12 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
 		goto free_bitmap;
 
+	if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
+		goto exit_hctx;
+
 	hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
 	if (!hctx->fq)
-		goto exit_hctx;
+		goto sched_exit_hctx;
 
 	if (set->ops->init_request &&
 	    set->ops->init_request(set->driver_data,
@@ -2007,6 +2012,8 @@ static int blk_mq_init_hctx(struct request_queue *q,
 
  free_fq:
 	kfree(hctx->fq);
+ sched_exit_hctx:
+	blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
  exit_hctx:
 	if (set->ops->exit_hctx)
 		set->ops->exit_hctx(hctx, hctx_idx);

From 54d5329d425650fafaf90660a139c771d2d49cae Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Fri, 7 Apr 2017 08:52:27 -0600
Subject: [PATCH 281/410] blk-mq-sched: fix crash in switch error path

In elevator_switch(), if blk_mq_init_sched() fails, we attempt to fall
back to the original scheduler. However, at this point, we've already
torn down the original scheduler's tags, so this causes a crash. Doing
the fallback like the legacy elevator path is much harder for mq, so fix
it by just falling back to none, instead.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c     | 13 +++++-
 block/blk-mq-sched.h     |  2 +-
 block/blk-mq.c           |  2 -
 block/blk-sysfs.c        |  2 +-
 block/elevator.c         | 94 ++++++++++++++++++++++------------------
 include/linux/elevator.h |  2 +-
 6 files changed, 67 insertions(+), 48 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 0bb13bb51daa..e8c2ed654ef0 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -451,7 +451,7 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
 	return ret;
 }
 
-void blk_mq_sched_teardown(struct request_queue *q)
+static void blk_mq_sched_tags_teardown(struct request_queue *q)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
 	struct blk_mq_hw_ctx *hctx;
@@ -513,10 +513,19 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	return 0;
 
 err:
-	blk_mq_sched_teardown(q);
+	blk_mq_sched_tags_teardown(q);
+	q->elevator = NULL;
 	return ret;
 }
 
+void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
+{
+	if (e->type->ops.mq.exit_sched)
+		e->type->ops.mq.exit_sched(e);
+	blk_mq_sched_tags_teardown(q);
+	q->elevator = NULL;
+}
+
 int blk_mq_sched_init(struct request_queue *q)
 {
 	int ret;
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 19db25e0c95a..e704956e0862 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -33,7 +33,7 @@ void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
 			struct request *(*get_rq)(struct blk_mq_hw_ctx *));
 
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
-void blk_mq_sched_teardown(struct request_queue *q);
+void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
 
 int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
 			   unsigned int hctx_idx);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 72e744cd638c..cfb7c97b14ec 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2240,8 +2240,6 @@ void blk_mq_release(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	unsigned int i;
 
-	blk_mq_sched_teardown(q);
-
 	/* hctx kobj stays in hctx */
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (!hctx)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index c44b321335f3..37f0b3ad635e 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -816,7 +816,7 @@ static void blk_release_queue(struct kobject *kobj)
 
 	if (q->elevator) {
 		ioc_clear_queue(q);
-		elevator_exit(q->elevator);
+		elevator_exit(q, q->elevator);
 	}
 
 	blk_exit_rl(&q->root_rl);
diff --git a/block/elevator.c b/block/elevator.c
index f236ef1d2be9..dbeecf7be719 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -252,11 +252,11 @@ int elevator_init(struct request_queue *q, char *name)
 }
 EXPORT_SYMBOL(elevator_init);
 
-void elevator_exit(struct elevator_queue *e)
+void elevator_exit(struct request_queue *q, struct elevator_queue *e)
 {
 	mutex_lock(&e->sysfs_lock);
 	if (e->uses_mq && e->type->ops.mq.exit_sched)
-		e->type->ops.mq.exit_sched(e);
+		blk_mq_exit_sched(q, e);
 	else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
 		e->type->ops.sq.elevator_exit_fn(e);
 	mutex_unlock(&e->sysfs_lock);
@@ -941,6 +941,45 @@ void elv_unregister(struct elevator_type *e)
 }
 EXPORT_SYMBOL_GPL(elv_unregister);
 
+static int elevator_switch_mq(struct request_queue *q,
+			      struct elevator_type *new_e)
+{
+	int ret;
+
+	blk_mq_freeze_queue(q);
+	blk_mq_quiesce_queue(q);
+
+	if (q->elevator) {
+		if (q->elevator->registered)
+			elv_unregister_queue(q);
+		ioc_clear_queue(q);
+		elevator_exit(q, q->elevator);
+	}
+
+	ret = blk_mq_init_sched(q, new_e);
+	if (ret)
+		goto out;
+
+	if (new_e) {
+		ret = elv_register_queue(q);
+		if (ret) {
+			elevator_exit(q, q->elevator);
+			goto out;
+		}
+	}
+
+	if (new_e)
+		blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
+	else
+		blk_add_trace_msg(q, "elv switch: none");
+
+out:
+	blk_mq_unfreeze_queue(q);
+	blk_mq_start_stopped_hw_queues(q, true);
+	return ret;
+
+}
+
 /*
  * switch to new_e io scheduler. be careful not to introduce deadlocks -
  * we don't free the old io scheduler, before we have allocated what we
@@ -953,10 +992,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	bool old_registered = false;
 	int err;
 
-	if (q->mq_ops) {
-		blk_mq_freeze_queue(q);
-		blk_mq_quiesce_queue(q);
-	}
+	if (q->mq_ops)
+		return elevator_switch_mq(q, new_e);
 
 	/*
 	 * Turn on BYPASS and drain all requests w/ elevator private data.
@@ -968,11 +1005,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	if (old) {
 		old_registered = old->registered;
 
-		if (old->uses_mq)
-			blk_mq_sched_teardown(q);
-
-		if (!q->mq_ops)
-			blk_queue_bypass_start(q);
+		blk_queue_bypass_start(q);
 
 		/* unregister and clear all auxiliary data of the old elevator */
 		if (old_registered)
@@ -982,53 +1015,32 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	}
 
 	/* allocate, init and register new elevator */
-	if (q->mq_ops)
-		err = blk_mq_init_sched(q, new_e);
-	else
-		err = new_e->ops.sq.elevator_init_fn(q, new_e);
+	err = new_e->ops.sq.elevator_init_fn(q, new_e);
 	if (err)
 		goto fail_init;
 
-	if (new_e) {
-		err = elv_register_queue(q);
-		if (err)
-			goto fail_register;
-	}
+	err = elv_register_queue(q);
+	if (err)
+		goto fail_register;
 
 	/* done, kill the old one and finish */
 	if (old) {
-		elevator_exit(old);
-		if (!q->mq_ops)
-			blk_queue_bypass_end(q);
+		elevator_exit(q, old);
+		blk_queue_bypass_end(q);
 	}
 
-	if (q->mq_ops) {
-		blk_mq_unfreeze_queue(q);
-		blk_mq_start_stopped_hw_queues(q, true);
-	}
-
-	if (new_e)
-		blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
-	else
-		blk_add_trace_msg(q, "elv switch: none");
+	blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
 
 	return 0;
 
 fail_register:
-	if (q->mq_ops)
-		blk_mq_sched_teardown(q);
-	elevator_exit(q->elevator);
+	elevator_exit(q, q->elevator);
 fail_init:
 	/* switch failed, restore and re-register old elevator */
 	if (old) {
 		q->elevator = old;
 		elv_register_queue(q);
-		if (!q->mq_ops)
-			blk_queue_bypass_end(q);
-	}
-	if (q->mq_ops) {
-		blk_mq_unfreeze_queue(q);
-		blk_mq_start_stopped_hw_queues(q, true);
+		blk_queue_bypass_end(q);
 	}
 
 	return err;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index aebecc4ed088..22d39e8d4de1 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -211,7 +211,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *);
 extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
 
 extern int elevator_init(struct request_queue *, char *);
-extern void elevator_exit(struct elevator_queue *);
+extern void elevator_exit(struct request_queue *, struct elevator_queue *);
 extern int elevator_change(struct request_queue *, const char *);
 extern bool elv_bio_merge_ok(struct request *, struct bio *);
 extern struct elevator_queue *elevator_alloc(struct request_queue *,

From ebe8bddb6e30d7a02775b9972099271fc5910f37 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Fri, 7 Apr 2017 08:53:11 -0600
Subject: [PATCH 282/410] blk-mq: remap queues when adding/removing hardware
 queues

blk_mq_update_nr_hw_queues() used to remap hardware queues, which is the
behavior that drivers expect. However, commit 4e68a011428a changed
blk_mq_queue_reinit() to not remap queues for the case of CPU
hotplugging, inadvertently making blk_mq_update_nr_hw_queues() not remap
queues as well. This breaks, for example, NBD's multi-connection mode,
leaving the added hardware queues unused. Fix it by making
blk_mq_update_nr_hw_queues() explicitly remap the queues.

Fixes: 4e68a011428a ("blk-mq: don't redistribute hardware queues on a CPU hotplug event")
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index cfb7c97b14ec..f1be5c3eb600 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2570,6 +2570,14 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 	return 0;
 }
 
+static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
+{
+	if (set->ops->map_queues)
+		return set->ops->map_queues(set);
+	else
+		return blk_mq_map_queues(set);
+}
+
 /*
  * Alloc a tag set to be associated with one or more request queues.
  * May fail with EINVAL for various error conditions. May adjust the
@@ -2624,10 +2632,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (!set->mq_map)
 		goto out_free_tags;
 
-	if (set->ops->map_queues)
-		ret = set->ops->map_queues(set);
-	else
-		ret = blk_mq_map_queues(set);
+	ret = blk_mq_update_queue_map(set);
 	if (ret)
 		goto out_free_mq_map;
 
@@ -2719,6 +2724,7 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
 		blk_mq_freeze_queue(q);
 
 	set->nr_hw_queues = nr_hw_queues;
+	blk_mq_update_queue_map(set);
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_realloc_hw_ctxs(set, q);
 

From cfc5b2b551d8c089079e754525839101b1b43624 Mon Sep 17 00:00:00 2001
From: Bastian Stender <bst@pengutronix.de>
Date: Fri, 7 Apr 2017 17:28:23 +0200
Subject: [PATCH 283/410] fbdev/ssd1307fb: fix optional VBAT support

SSD1306 needs VBAT when it is wired in charge pump configuration only.
Other controllers of the SSD1307 family do not need it at all. This was
introduced by commit ba14301e0356 ("fbdev/ssd1307fb: add support to
enable VBAT").

Without VBAT configuration the driver now fails with:

	failed to get VBAT regulator: -19

This is caused by misinterpretation of devm_regulator_get_optional
which "returns a struct regulator corresponding to the regulator
producer or IS_ERR() condition".

Handle -ENODEV without bailing out and making VBAT support really
optional.

Signed-off-by: Bastian Stender <bst@pengutronix.de>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Jyri Sarha <jsarha@ti.com>
Cc: Roger Quadros <rogerq@ti.com>
[b.zolnierkie: minor fixups]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/ssd1307fb.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c
index bd017b57c47f..f599520374dd 100644
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -578,10 +578,14 @@ static int ssd1307fb_probe(struct i2c_client *client,
 
 	par->vbat_reg = devm_regulator_get_optional(&client->dev, "vbat");
 	if (IS_ERR(par->vbat_reg)) {
-		dev_err(&client->dev, "failed to get VBAT regulator: %ld\n",
-			PTR_ERR(par->vbat_reg));
 		ret = PTR_ERR(par->vbat_reg);
-		goto fb_alloc_error;
+		if (ret == -ENODEV) {
+			par->vbat_reg = NULL;
+		} else {
+			dev_err(&client->dev, "failed to get VBAT regulator: %d\n",
+				ret);
+			goto fb_alloc_error;
+		}
 	}
 
 	if (of_property_read_u32(node, "solomon,width", &par->width))
@@ -668,10 +672,13 @@ static int ssd1307fb_probe(struct i2c_client *client,
 		udelay(4);
 	}
 
-	ret = regulator_enable(par->vbat_reg);
-	if (ret) {
-		dev_err(&client->dev, "failed to enable VBAT: %d\n", ret);
-		goto reset_oled_error;
+	if (par->vbat_reg) {
+		ret = regulator_enable(par->vbat_reg);
+		if (ret) {
+			dev_err(&client->dev, "failed to enable VBAT: %d\n",
+				ret);
+			goto reset_oled_error;
+		}
 	}
 
 	ret = ssd1307fb_init(par);
@@ -710,7 +717,8 @@ panel_init_error:
 		pwm_put(par->pwm);
 	};
 regulator_enable_error:
-	regulator_disable(par->vbat_reg);
+	if (par->vbat_reg)
+		regulator_disable(par->vbat_reg);
 reset_oled_error:
 	fb_deferred_io_cleanup(info);
 fb_alloc_error:

From 9121b15b5628b38b4695282dc18c553440e0f79b Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 7 Apr 2017 17:28:23 +0200
Subject: [PATCH 284/410] xen, fbfront: fix connecting to backend

Connecting to the backend isn't working reliably in xen-fbfront: in
case XenbusStateInitWait of the backend has been missed the backend
transition to XenbusStateConnected will trigger the connected state
only without doing the actions required when the backend has
connected.

Cc: stable@vger.kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/xen-fbfront.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index d0115a7af0a9..3ee309c50b2d 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -643,7 +643,6 @@ static void xenfb_backend_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateInitWait:
-InitWait:
 		xenbus_switch_state(dev, XenbusStateConnected);
 		break;
 
@@ -654,7 +653,8 @@ InitWait:
 		 * get Connected twice here.
 		 */
 		if (dev->state != XenbusStateConnected)
-			goto InitWait; /* no InitWait seen yet, fudge it */
+			/* no InitWait seen yet, fudge it */
+			xenbus_switch_state(dev, XenbusStateConnected);
 
 		if (xenbus_read_unsigned(info->xbdev->otherend,
 					 "request-update", 0))

From 3c1a427954399fd1bda1ee7e1b356f47b61cee74 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 6 Apr 2017 09:51:51 +0200
Subject: [PATCH 285/410] perf annotate s390: Fix perf annotate error -95 (4.10
 regression)

since 4.10 perf annotate exits on s390 with an "unknown error -95".
Turns out that commit 786c1b51844d ("perf annotate: Start supporting
cross arch annotation") added a hard requirement for architecture
support when objdump is used but only provided x86 and arm support.
Meanwhile power was added so lets add s390 as well.

While at it make sure to implement the branch and jump types.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-s390 <linux-s390@vger.kernel.org>
Cc: stable@kernel.org # v4.10+
Fixes: 786c1b51844 "perf annotate: Start supporting cross arch annotation"
Link: http://lkml.kernel.org/r/1491465112-45819-2-git-send-email-borntraeger@de.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 273f21fa32b5..7aa57225cbf7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -130,6 +130,12 @@ static struct arch architectures[] = {
 		.name = "powerpc",
 		.init = powerpc__annotate_init,
 	},
+	{
+		.name = "s390",
+		.objdump =  {
+			.comment_char = '#',
+		},
+	},
 };
 
 static void ins__delete(struct ins_operands *ops)

From 1680a3868f00be638a8a213a321e88d11ce7e9f7 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Fri, 7 Apr 2017 23:51:05 +0800
Subject: [PATCH 286/410] sysctl: add sanity check for proc_douintvec

Commit e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32
fields") introduced the proc_douintvec helper function, but it forgot to
add the related sanity check when doing register_sysctl_table.  So add
it now.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Cc: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/proc_sysctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 8f91ec66baa3..d04ea4349909 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1074,6 +1074,7 @@ static int sysctl_check_table(const char *path, struct ctl_table *table)
 
 		if ((table->proc_handler == proc_dostring) ||
 		    (table->proc_handler == proc_dointvec) ||
+		    (table->proc_handler == proc_douintvec) ||
 		    (table->proc_handler == proc_dointvec_minmax) ||
 		    (table->proc_handler == proc_dointvec_jiffies) ||
 		    (table->proc_handler == proc_dointvec_userhz_jiffies) ||

From 5380e5644afbba9e3d229c36771134976f05c91e Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Fri, 7 Apr 2017 23:51:06 +0800
Subject: [PATCH 287/410] sysctl: don't print negative flag for proc_douintvec

I saw some very confusing sysctl output on my system:
  # cat /proc/sys/net/core/xfrm_aevent_rseqth
  -2
  # cat /proc/sys/net/core/xfrm_aevent_etime
  -10
  # cat /proc/sys/net/ipv4/tcp_notsent_lowat
  -4294967295

Because we forget to set the *negp flag in proc_douintvec, so it will
become a garbage value.

Since the value related to proc_douintvec is always an unsigned integer,
so we can set *negp to false explictily to fix this issue.

Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Cc: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index acf0a5a06da7..8cca4c7bb21f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2136,6 +2136,7 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
 		*valp = *lvalp;
 	} else {
 		unsigned int val = *valp;
+		*negp = false;
 		*lvalp = (unsigned long)val;
 	}
 	return 0;

From 7587a5ae7eef0439f7be31f1b5959af062bbc5ec Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 7 Apr 2017 11:16:52 -0700
Subject: [PATCH 288/410] blk-mq: Introduce blk_mq_delay_run_hw_queue()

Introduce a function that runs a hardware queue unconditionally
after a delay. Note: there is already a function that stops and
restarts a hardware queue after a delay, namely blk_mq_delay_queue().

This function will be used in the next patch in this series.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Long Li <longli@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 32 ++++++++++++++++++++++++++++++--
 include/linux/blk-mq.h |  2 ++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f1be5c3eb600..ad45ae743186 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1135,7 +1135,8 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
 	return hctx->next_cpu;
 }
 
-void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
+static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
+					unsigned long msecs)
 {
 	if (unlikely(blk_mq_hctx_stopped(hctx) ||
 		     !blk_mq_hw_queue_mapped(hctx)))
@@ -1152,7 +1153,24 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 		put_cpu();
 	}
 
-	kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work);
+	if (msecs == 0)
+		kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx),
+					 &hctx->run_work);
+	else
+		kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
+						 &hctx->delayed_run_work,
+						 msecs_to_jiffies(msecs));
+}
+
+void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
+{
+	__blk_mq_delay_run_hw_queue(hctx, true, msecs);
+}
+EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
+
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
+{
+	__blk_mq_delay_run_hw_queue(hctx, async, 0);
 }
 
 void blk_mq_run_hw_queues(struct request_queue *q, bool async)
@@ -1255,6 +1273,15 @@ static void blk_mq_run_work_fn(struct work_struct *work)
 	__blk_mq_run_hw_queue(hctx);
 }
 
+static void blk_mq_delayed_run_work_fn(struct work_struct *work)
+{
+	struct blk_mq_hw_ctx *hctx;
+
+	hctx = container_of(work, struct blk_mq_hw_ctx, delayed_run_work.work);
+
+	__blk_mq_run_hw_queue(hctx);
+}
+
 static void blk_mq_delay_work_fn(struct work_struct *work)
 {
 	struct blk_mq_hw_ctx *hctx;
@@ -1962,6 +1989,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		node = hctx->numa_node = set->numa_node;
 
 	INIT_WORK(&hctx->run_work, blk_mq_run_work_fn);
+	INIT_DELAYED_WORK(&hctx->delayed_run_work, blk_mq_delayed_run_work_fn);
 	INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
 	spin_lock_init(&hctx->lock);
 	INIT_LIST_HEAD(&hctx->dispatch);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b296a9006117..9382c5da7a2e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -51,6 +51,7 @@ struct blk_mq_hw_ctx {
 
 	atomic_t		nr_active;
 
+	struct delayed_work	delayed_run_work;
 	struct delayed_work	delay_work;
 
 	struct hlist_node	cpuhp_dead;
@@ -238,6 +239,7 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,

From 36e3cf273977da34a760d513e1bef8431a9abaa0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 7 Apr 2017 11:16:53 -0700
Subject: [PATCH 289/410] scsi: Avoid that SCSI queues get stuck

If a .queue_rq() function returns BLK_MQ_RQ_QUEUE_BUSY then the block
driver that implements that function is responsible for rerunning the
hardware queue once requests can be queued again successfully.

commit 52d7f1b5c2f3 ("blk-mq: Avoid that requeueing starts stopped
queues") removed the blk_mq_stop_hw_queue() call from scsi_queue_rq()
for the BLK_MQ_RQ_QUEUE_BUSY case. Hence change all calls to functions
that are intended to rerun a busy queue such that these examine all
hardware queues instead of only stopped queues.

Since no other functions than scsi_internal_device_block() and
scsi_internal_device_unblock() should ever stop or restart a SCSI
queue, change the blk_mq_delay_queue() call into a
blk_mq_delay_run_hw_queue() call.

Fixes: commit 52d7f1b5c2f3 ("blk-mq: Avoid that requeueing starts stopped queues")
Fixes: commit 7e79dadce222 ("blk-mq: stop hardware queue in blk_mq_delay_queue()")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Long Li <longli@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/scsi/scsi_lib.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 19125d72f322..e5a2d590a104 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -496,7 +496,7 @@ static void scsi_run_queue(struct request_queue *q)
 		scsi_starved_list_run(sdev->host);
 
 	if (q->mq_ops)
-		blk_mq_start_stopped_hw_queues(q, false);
+		blk_mq_run_hw_queues(q, false);
 	else
 		blk_run_queue(q);
 }
@@ -667,7 +667,7 @@ static bool scsi_end_request(struct request *req, int error,
 		    !list_empty(&sdev->host->starved_list))
 			kblockd_schedule_work(&sdev->requeue_work);
 		else
-			blk_mq_start_stopped_hw_queues(q, true);
+			blk_mq_run_hw_queues(q, true);
 	} else {
 		unsigned long flags;
 
@@ -1974,7 +1974,7 @@ out:
 	case BLK_MQ_RQ_QUEUE_BUSY:
 		if (atomic_read(&sdev->device_busy) == 0 &&
 		    !scsi_device_blocked(sdev))
-			blk_mq_delay_queue(hctx, SCSI_QUEUE_DELAY);
+			blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY);
 		break;
 	case BLK_MQ_RQ_QUEUE_ERROR:
 		/*

From 6077c2d706097c00d8f2fed57d3f3c45cd228ee8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 7 Apr 2017 11:16:54 -0700
Subject: [PATCH 290/410] dm rq: Avoid that request processing stalls
 sporadically

While running the srp-test software I noticed that request
processing stalls sporadically at the beginning of a test, namely
when mkfs is run against a dm-mpath device. Every time when that
happened the following command was sufficient to resume request
processing:

    echo run >/sys/kernel/debug/block/dm-0/state

This patch avoids that such request processing stalls occur. The
test I ran is as follows:

    while srp-test/run_tests -d -r 30 -t 02-mq; do :; done

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: dm-devel@redhat.com
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/dm-rq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 28955b94d2b2..0b081d170087 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -755,6 +755,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 		/* Undo dm_start_request() before requeuing */
 		rq_end_stats(md, rq);
 		rq_completed(md, rq_data_dir(rq), false);
+		blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
 		return BLK_MQ_RQ_QUEUE_BUSY;
 	}
 

From 6d8c6c0f97ad8a3517c42b179c1dc8e77397d0e2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 7 Apr 2017 12:40:09 -0600
Subject: [PATCH 291/410] blk-mq: Restart a single queue if tag sets are shared

To improve scalability, if hardware queues are shared, restart
a single hardware queue in round-robin fashion. Rename
blk_mq_sched_restart_queues() to reflect the new semantics.
Remove blk_mq_sched_mark_restart_queue() because this function
has no callers. Remove flag QUEUE_FLAG_RESTART because this
patch removes the code that uses this flag.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sched.c   | 63 +++++++++++++++++++++++++++++++++++-------
 block/blk-mq-sched.h   | 16 +----------
 block/blk-mq.c         |  2 +-
 include/linux/blkdev.h |  1 -
 4 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index e8c2ed654ef0..c974a1bbf4cb 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -318,25 +318,68 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
 	return true;
 }
 
-static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
+static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
 {
 	if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
 		clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-		if (blk_mq_hctx_has_pending(hctx))
+		if (blk_mq_hctx_has_pending(hctx)) {
 			blk_mq_run_hw_queue(hctx, true);
+			return true;
+		}
 	}
+	return false;
 }
 
-void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
-{
-	struct request_queue *q = hctx->queue;
-	unsigned int i;
+/**
+ * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
+ * @pos:    loop cursor.
+ * @skip:   the list element that will not be examined. Iteration starts at
+ *          @skip->next.
+ * @head:   head of the list to examine. This list must have at least one
+ *          element, namely @skip.
+ * @member: name of the list_head structure within typeof(*pos).
+ */
+#define list_for_each_entry_rcu_rr(pos, skip, head, member)		\
+	for ((pos) = (skip);						\
+	     (pos = (pos)->member.next != (head) ? list_entry_rcu(	\
+			(pos)->member.next, typeof(*pos), member) :	\
+	      list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
+	     (pos) != (skip); )
 
-	if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
-		if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
-			queue_for_each_hw_ctx(q, hctx, i)
-				blk_mq_sched_restart_hctx(hctx);
+/*
+ * Called after a driver tag has been freed to check whether a hctx needs to
+ * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
+ * queues in a round-robin fashion if the tag set of @hctx is shared with other
+ * hardware queues.
+ */
+void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
+{
+	struct blk_mq_tags *const tags = hctx->tags;
+	struct blk_mq_tag_set *const set = hctx->queue->tag_set;
+	struct request_queue *const queue = hctx->queue, *q;
+	struct blk_mq_hw_ctx *hctx2;
+	unsigned int i, j;
+
+	if (set->flags & BLK_MQ_F_TAG_SHARED) {
+		rcu_read_lock();
+		list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
+					   tag_set_list) {
+			queue_for_each_hw_ctx(q, hctx2, i)
+				if (hctx2->tags == tags &&
+				    blk_mq_sched_restart_hctx(hctx2))
+					goto done;
 		}
+		j = hctx->queue_num + 1;
+		for (i = 0; i < queue->nr_hw_queues; i++, j++) {
+			if (j == queue->nr_hw_queues)
+				j = 0;
+			hctx2 = queue->queue_hw_ctx[j];
+			if (hctx2->tags == tags &&
+			    blk_mq_sched_restart_hctx(hctx2))
+				break;
+		}
+done:
+		rcu_read_unlock();
 	} else {
 		blk_mq_sched_restart_hctx(hctx);
 	}
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index e704956e0862..3a9e6e40558b 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -19,7 +19,7 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
 				struct request **merged_request);
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
 bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
-void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx);
+void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
 
 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
 				 bool run_queue, bool async, bool can_block);
@@ -136,20 +136,6 @@ static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
 		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
 }
 
-/*
- * Mark a hardware queue and the request queue it belongs to as needing a
- * restart.
- */
-static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx)
-{
-	struct request_queue *q = hctx->queue;
-
-	if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
-		set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
-	if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
-		set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
-}
-
 static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
 {
 	return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ad45ae743186..572966f49596 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -348,7 +348,7 @@ void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 		blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
 	if (sched_tag != -1)
 		blk_mq_sched_completed_request(hctx, rq);
-	blk_mq_sched_restart_queues(hctx);
+	blk_mq_sched_restart(hctx);
 	blk_queue_exit(q);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 5a7da607ca04..7548f332121a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -610,7 +610,6 @@ struct request_queue {
 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
 #define QUEUE_FLAG_DAX         26	/* device supports DAX */
 #define QUEUE_FLAG_STATS       27	/* track rq completion times */
-#define QUEUE_FLAG_RESTART     28	/* queue needs restart at completion */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\

From cc663f4d4c97b7297fb45135ab23cfd508b35a77 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 7 Apr 2017 11:42:05 -0700
Subject: [PATCH 292/410] tcp: restrict F-RTO to work-around broken
 middle-boxes

The recent extension of F-RTO 89fe18e44 ("tcp: extend F-RTO
to catch more spurious timeouts") interacts badly with certain
broken middle-boxes.  These broken boxes modify and falsely raise
the receive window on the ACKs. During a timeout induced recovery,
F-RTO would send new data packets to probe if the timeout is false
or not. Since the receive window is falsely raised, the receiver
would silently drop these F-RTO packets. The recovery would take N
(exponentially backoff) timeouts to repair N packet losses.  A TCP
performance killer.

Due to this unfortunate situation, this patch removes this extension
to revert F-RTO back to the RFC specification.

Fixes: 89fe18e44f7e ("tcp: extend F-RTO to catch more spurious timeouts")
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2c1f59386a7b..659d1baefb2b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1935,6 +1935,7 @@ void tcp_enter_loss(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct net *net = sock_net(sk);
 	struct sk_buff *skb;
+	bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
 	bool is_reneg;			/* is receiver reneging on SACKs? */
 	bool mark_lost;
 
@@ -1994,15 +1995,18 @@ void tcp_enter_loss(struct sock *sk)
 	tp->high_seq = tp->snd_nxt;
 	tcp_ecn_queue_cwr(tp);
 
-	/* F-RTO RFC5682 sec 3.1 step 1 mandates to disable F-RTO
-	 * if a previous recovery is underway, otherwise it may incorrectly
-	 * call a timeout spurious if some previously retransmitted packets
-	 * are s/acked (sec 3.2). We do not apply that retriction since
-	 * retransmitted skbs are permanently tagged with TCPCB_EVER_RETRANS
-	 * so FLAG_ORIG_SACK_ACKED is always correct. But we do disable F-RTO
-	 * on PTMU discovery to avoid sending new data.
+	/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
+	 * loss recovery is underway except recurring timeout(s) on
+	 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+	 *
+	 * In theory F-RTO can be used repeatedly during loss recovery.
+	 * In practice this interacts badly with broken middle-boxes that
+	 * falsely raise the receive window, which results in repeated
+	 * timeouts and stop-and-go behavior.
 	 */
-	tp->frto = sysctl_tcp_frto && !inet_csk(sk)->icsk_mtup.probe_size;
+	tp->frto = sysctl_tcp_frto &&
+		   (new_recovery || icsk->icsk_retransmits) &&
+		   !inet_csk(sk)->icsk_mtup.probe_size;
 }
 
 /* If ACK arrived pointing to a remembered SACK, it means that our

From cefdc26e86728812aea54248a534fd4a5da2a43d Mon Sep 17 00:00:00 2001
From: Martin Brandenburg <martin@omnibond.com>
Date: Thu, 6 Apr 2017 18:11:00 -0400
Subject: [PATCH 293/410] orangefs: move features validation to fix filesystem
 hang

Without this fix (and another to the userspace component itself
described later), the kernel will be unable to process any OrangeFS
requests after the userspace component is restarted (due to a crash or
at the administrator's behest).

The bug here is that inside orangefs_remount, the orangefs_request_mutex
is locked.  When the userspace component restarts while the filesystem
is mounted, it sends a ORANGEFS_DEV_REMOUNT_ALL ioctl to the device,
which causes the kernel to send it a few requests aimed at synchronizing
the state between the two.  While this is happening the
orangefs_request_mutex is locked to prevent any other requests going
through.

This is only half of the bugfix.  The other half is in the userspace
component which outright ignores(!) requests made before it considers
the filesystem remounted, which is after the ioctl returns.  Of course
the ioctl doesn't return until after the userspace component responds to
the request it ignores.  The userspace component has been changed to
allow ORANGEFS_VFS_OP_FEATURES regardless of the mount status.

Mike Marshall says:
 "I've tested this patch against the fixed userspace part. This patch is
  real important, I hope it can make it into 4.11...

  Here's what happens when the userspace daemon is restarted, without
  the patch:

    =============================================
    [ INFO: possible recursive locking detected ]
    [   4.10.0-00007-ge98bdb3 #1 Not tainted    ]
    ---------------------------------------------
    pvfs2-client-co/29032 is trying to acquire lock:
     (orangefs_request_mutex){+.+.+.}, at: service_operation+0x3c7/0x7b0 [orangefs]
                  but task is already holding lock:
     (orangefs_request_mutex){+.+.+.}, at: dispatch_ioctl_command+0x1bf/0x330 [orangefs]

    CPU: 0 PID: 29032 Comm: pvfs2-client-co Not tainted 4.10.0-00007-ge98bdb3 #1
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014
    Call Trace:
     __lock_acquire+0x7eb/0x1290
     lock_acquire+0xe8/0x1d0
     mutex_lock_killable_nested+0x6f/0x6e0
     service_operation+0x3c7/0x7b0 [orangefs]
     orangefs_remount+0xea/0x150 [orangefs]
     dispatch_ioctl_command+0x227/0x330 [orangefs]
     orangefs_devreq_ioctl+0x29/0x70 [orangefs]
     do_vfs_ioctl+0xa3/0x6e0
     SyS_ioctl+0x79/0x90"

Signed-off-by: Martin Brandenburg <martin@omnibond.com>
Acked-by: Mike Marshall <hubcap@omnibond.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/orangefs/super.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 67c24351a67f..cd261c8de53a 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -263,8 +263,13 @@ int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb)
 		if (!new_op)
 			return -ENOMEM;
 		new_op->upcall.req.features.features = 0;
-		ret = service_operation(new_op, "orangefs_features", 0);
-		orangefs_features = new_op->downcall.resp.features.features;
+		ret = service_operation(new_op, "orangefs_features",
+		    ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_MUTEX);
+		if (!ret)
+			orangefs_features =
+			    new_op->downcall.resp.features.features;
+		else
+			orangefs_features = 0;
 		op_release(new_op);
 	} else {
 		orangefs_features = 0;

From 6780414519f91c2a84da9baa963a940ac916f803 Mon Sep 17 00:00:00 2001
From: Fam Zheng <famz@redhat.com>
Date: Tue, 28 Mar 2017 12:41:26 +0800
Subject: [PATCH 294/410] scsi: sd: Consider max_xfer_blocks if opt_xfer_blocks
 is unusable

If device reports a small max_xfer_blocks and a zero opt_xfer_blocks, we
end up using BLK_DEF_MAX_SECTORS, which is wrong and r/w of that size
may get error.

[mkp: tweaked to avoid setting rw_max twice and added typecast]

Cc: <stable@vger.kernel.org> # v4.4+
Fixes: ca369d51b3e ("block/sd: Fix device-imposed transfer length limits")
Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fb9b4d29af0b..906cd6bfa2a9 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2956,7 +2956,8 @@ static int sd_revalidate_disk(struct gendisk *disk)
 		q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
 		rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
 	} else
-		rw_max = BLK_DEF_MAX_SECTORS;
+		rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
+				      (sector_t)BLK_DEF_MAX_SECTORS);
 
 	/* Combine with controller limits */
 	q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));

From a00a7862513089f17209b732f230922f1942e0b9 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 17 Mar 2017 08:47:14 -0400
Subject: [PATCH 295/410] scsi: sr: Sanity check returned mode data

Kefeng Wang discovered that old versions of the QEMU CD driver would
return mangled mode data causing us to walk off the end of the buffer in
an attempt to parse it. Sanity check the returned mode sense data.

Cc: <stable@vger.kernel.org>
Reported-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Tested-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0b29b9329b1c..a8f630213a1a 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -836,6 +836,7 @@ static void get_capabilities(struct scsi_cd *cd)
 	unsigned char *buffer;
 	struct scsi_mode_data data;
 	struct scsi_sense_hdr sshdr;
+	unsigned int ms_len = 128;
 	int rc, n;
 
 	static const char *loadmech[] =
@@ -862,10 +863,11 @@ static void get_capabilities(struct scsi_cd *cd)
 	scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
 
 	/* ask for mode page 0x2a */
-	rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, 128,
+	rc = scsi_mode_sense(cd->device, 0, 0x2a, buffer, ms_len,
 			     SR_TIMEOUT, 3, &data, NULL);
 
-	if (!scsi_status_is_good(rc)) {
+	if (!scsi_status_is_good(rc) || data.length > ms_len ||
+	    data.header_length + data.block_descriptor_length > data.length) {
 		/* failed, drive doesn't have capabilities mode page */
 		cd->cdi.speed = 1;
 		cd->cdi.mask |= (CDC_CD_R | CDC_CD_RW | CDC_DVD_R |

From 8eaf7dfcfcf222e56f7d1e0a9ffdd7be0f300c2f Mon Sep 17 00:00:00 2001
From: Chad Dupuis <chad.dupuis@cavium.com>
Date: Thu, 23 Mar 2017 06:58:47 -0700
Subject: [PATCH 296/410] scsi: qedf: Fix crash due to unsolicited FIP VLAN
 response.

We need to initialize qedf->fipvlan_compl in __qedf_probe so that if we
receive an unsolicited FIP VLAN response, the system doesn't crash due
to trying to complete an uninitialized completion.

Also add a check to see if there are any waiters on the completion so we
don't inadvertantly kick start the discovery process due to the
unsolicited frame.

Fixed the crash:

<1>BUG: unable to handle kernel NULL pointer dereference at (null)
<1>IP: [<ffffffff8105ed71>] __wake_up_common+0x31/0x90
<4>PGD 0
<4>Oops: 0000 [#1] SMP
<4>last sysfs file: /sys/devices/system/cpu/online
<4>CPU 7
<4>Modules linked in: autofs4 nfs lockd fscache auth_rpcgss nfs_acl sunrpc target_core_iblock target_core_file target_core_pscsi target_core_mod configfs bnx2fc cnic fcoe 8021q garp stp llc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 vfat fat uinput ipmi_devintf microcode power_meter acpi_ipmi ipmi_si ipmi_msghandler iTCO_wdt iTCO_vendor_support dcdbas sg joydev sb_edac edac_core lpc_ich mfd_core shpchp tg3 ptp pps_core ext4 jbd2 mbcache sr_mod cdrom sd_mod crc_t10dif qedi(U) iscsi_boot_sysfs libiscsi scsi_transport_iscsi uio qedf(U) libfcoe libfc scsi_transport_fc scsi_tgt qede(U) qed(U) ahci megaraid_sas wmi dm_mirror dm_region_hash dm_log dm_mod [last unloaded: speedstep_lib]
<4>
<4>Pid: 1485, comm: qedf_11_ll2 Not tainted 2.6.32-642.el6.x86_64 #1 Dell Inc. PowerEdge R730/0599V5
<4>RIP: 0010:[<ffffffff8105ed71>]  [<ffffffff8105ed71>] __wake_up_common+0x31/0x90
<4>RSP: 0018:ffff881068a83d50  EFLAGS: 00010086
<4>RAX: ffffffffffffffe8 RBX: ffff88106bf42de0 RCX: 0000000000000000
<4>RDX: 0000000000000000 RSI: 0000000000000003 RDI: ffff88106bf42de0
<4>RBP: ffff881068a83d90 R08: 0000000000000000 R09: 00000000fffffffe
<4>R10: 0000000000000000 R11: 000000000000000b R12: 0000000000000286
<4>R13: ffff88106bf42de8 R14: 0000000000000000 R15: 0000000000000000
<4>FS:  0000000000000000(0000) GS:ffff88089c460000(0000) knlGS:0000000000000000
<4>CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
<4>CR2: 0000000000000000 CR3: 0000000001a8d000 CR4: 00000000001407e0
<4>DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
<4>DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
<4>Process qedf_11_ll2 (pid: 1485, threadinfo ffff881068a80000, task ffff881068a70040)
<4>Stack:
<4> ffff88106ef00090 0000000300000001 ffff881068a83d90 ffff88106bf42de0
<4><d> 0000000000000286 ffff88106bf42dd8 ffff88106bf40a50 0000000000000002
<4><d> ffff881068a83dc0 ffffffff810634c7 ffff881000000003 000000000000000b
<4>Call Trace:
<4> [<ffffffff810634c7>] complete+0x47/0x60
<4> [<ffffffffa01d37e7>] qedf_fip_recv+0x1c7/0x450 [qedf]
<4> [<ffffffffa01cb3cb>] qedf_ll2_recv_thread+0x33b/0x510 [qedf]
<4> [<ffffffffa01cb090>] ? qedf_ll2_recv_thread+0x0/0x510 [qedf]
<4> [<ffffffff810a662e>] kthread+0x9e/0xc0
<4> [<ffffffff8100c28a>] child_rip+0xa/0x20
<4> [<ffffffff810a6590>] ? kthread+0x0/0xc0
<4> [<ffffffff8100c280>] ? child_rip+0x0/0x20
<4>Code: 41 56 41 55 41 54 53 48 83 ec 18 0f 1f 44 00 00 89 75 cc 89 55 c8 4c 8d 6f 08 48 8b 57 08 41 89 cf 4d 89 c6 48 8d 42 e8 49 39 d5 <48> 8b 58 18 74 3f 48 83 eb 18 eb 0a 0f 1f 00 48 89 d8 48 8d 5a
<1>RIP  [<ffffffff8105ed71>] __wake_up_common+0x31/0x90
<4> RSP <ffff881068a83d50>
<4>CR2: 0000000000000000

Signed-off-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_fip.c  | 3 ++-
 drivers/scsi/qedf/qedf_main.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c
index ed58b9104f58..e10b91cc3c62 100644
--- a/drivers/scsi/qedf/qedf_fip.c
+++ b/drivers/scsi/qedf/qedf_fip.c
@@ -99,7 +99,8 @@ static void qedf_fcoe_process_vlan_resp(struct qedf_ctx *qedf,
 		qedf_set_vlan_id(qedf, vid);
 
 		/* Inform waiter that it's ok to call fcoe_ctlr_link up() */
-		complete(&qedf->fipvlan_compl);
+		if (!completion_done(&qedf->fipvlan_compl))
+			complete(&qedf->fipvlan_compl);
 	}
 }
 
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 8e2a160490e6..cceddd995a4b 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2803,6 +2803,7 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
 		atomic_set(&qedf->num_offloads, 0);
 		qedf->stop_io_on_error = false;
 		pci_set_drvdata(pdev, qedf);
+		init_completion(&qedf->fipvlan_compl);
 
 		QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_INFO,
 		   "QLogic FastLinQ FCoE Module qedf %s, "

From bf6061b17a8d47ef0d9344d3ef576a4ff0edf793 Mon Sep 17 00:00:00 2001
From: Sawan Chandak <sawan.chandak@cavium.com>
Date: Fri, 31 Mar 2017 14:37:03 -0700
Subject: [PATCH 297/410] scsi: qla2xxx: Add fix to read correct register value
 for ISP82xx.

Add fix to read correct register value for ISP82xx, during check for
register disconnect.ISP82xx has different base register.

Fixes: a465537ad1a4 ("qla2xxx: Disable the adapter and skip error recovery in case of register disconnect")
Signed-off-by: Sawan Chandak <sawan.chandak@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 579363a6f44f..c9e45d2befe5 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1126,8 +1126,13 @@ static inline
 uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha)
 {
 	struct device_reg_24xx __iomem *reg = &ha->iobase->isp24;
+	struct device_reg_82xx __iomem *reg82 = &ha->iobase->isp82;
 
-	return ((RD_REG_DWORD(&reg->host_status)) == ISP_REG_DISCONNECT);
+	if (IS_P3P_TYPE(ha))
+		return ((RD_REG_DWORD(&reg82->host_int)) == ISP_REG_DISCONNECT);
+	else
+		return ((RD_REG_DWORD(&reg->host_status)) ==
+			ISP_REG_DISCONNECT);
 }
 
 /**************************************************************************

From 7c856152cb92f8eee2df29ef325a1b1f43161aff Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 4 Apr 2017 10:42:30 -0400
Subject: [PATCH 298/410] scsi: sd: Fix capacity calculation with 32-bit
 sector_t

We previously made sure that the reported disk capacity was less than
0xffffffff blocks when the kernel was not compiled with large sector_t
support (CONFIG_LBDAF). However, this check assumed that the capacity
was reported in units of 512 bytes.

Add a sanity check function to ensure that we only enable disks if the
entire reported capacity can be expressed in terms of sector_t.

Cc: <stable@vger.kernel.org>
Reported-by: Steve Magnani <steve.magnani@digidescorp.com>
Cc: Bart Van Assche <Bart.VanAssche@sandisk.com>
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 906cd6bfa2a9..fe0f7997074e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2102,6 +2102,22 @@ static void read_capacity_error(struct scsi_disk *sdkp, struct scsi_device *sdp,
 
 #define READ_CAPACITY_RETRIES_ON_RESET	10
 
+/*
+ * Ensure that we don't overflow sector_t when CONFIG_LBDAF is not set
+ * and the reported logical block size is bigger than 512 bytes. Note
+ * that last_sector is a u64 and therefore logical_to_sectors() is not
+ * applicable.
+ */
+static bool sd_addressable_capacity(u64 lba, unsigned int sector_size)
+{
+	u64 last_sector = (lba + 1ULL) << (ilog2(sector_size) - 9);
+
+	if (sizeof(sector_t) == 4 && last_sector > U32_MAX)
+		return false;
+
+	return true;
+}
+
 static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
 						unsigned char *buffer)
 {
@@ -2167,7 +2183,7 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
 		return -ENODEV;
 	}
 
-	if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) {
+	if (!sd_addressable_capacity(lba, sector_size)) {
 		sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
 			"kernel compiled with support for large block "
 			"devices.\n");
@@ -2256,7 +2272,7 @@ static int read_capacity_10(struct scsi_disk *sdkp, struct scsi_device *sdp,
 		return sector_size;
 	}
 
-	if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) {
+	if (!sd_addressable_capacity(lba, sector_size)) {
 		sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
 			"kernel compiled with support for large block "
 			"devices.\n");

From d75450ff40df0199bf13dfb19f435519ff947138 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 7 Apr 2017 16:04:39 -0700
Subject: [PATCH 299/410] mm: fix page_vma_mapped_walk() for ksm pages

Doug Smythies reports oops with KSM in this backtrace, I've been seeing
the same:

  page_vma_mapped_walk+0xe6/0x5b0
  page_referenced_one+0x91/0x1a0
  rmap_walk_ksm+0x100/0x190
  rmap_walk+0x4f/0x60
  page_referenced+0x149/0x170
  shrink_active_list+0x1c2/0x430
  shrink_node_memcg+0x67a/0x7a0
  shrink_node+0xe1/0x320
  kswapd+0x34b/0x720

Just as observed in commit 4b0ece6fa016 ("mm: migrate: fix
remove_migration_pte() for ksm pages"), you cannot use page->index
calculations on ksm pages.

page_vma_mapped_walk() is relying on __vma_address(), where a ksm page
can lead it off the end of the page table, and into whatever nonsense is
in the next page, ending as an oops inside check_pte()'s pte_page().

KSM tells page_vma_mapped_walk() exactly where to look for the page, it
does not need any page->index calculation: and that's so also for all
the normal and file and anon pages - just not for THPs and their
subpages.  Get out early in most cases: instead of a PageKsm test, move
down the earlier not-THP-page test, as suggested by Kirill.

I'm also slightly worried that this loop can stray into other vmas, so
added a vm_end test to prevent surprises; though I have not imagined
anything worse than a very contrived case, in which a page mlocked in
the next vma might be reclaimed because it is not mlocked in this vma.

Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1704031104400.1118@eggly.anvils
Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index c4c9def8ffea..de9c40d7304a 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -111,12 +111,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	if (pvmw->pmd && !pvmw->pte)
 		return not_found(pvmw);
 
-	/* Only for THP, seek to next pte entry makes sense */
-	if (pvmw->pte) {
-		if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
-			return not_found(pvmw);
+	if (pvmw->pte)
 		goto next_pte;
-	}
 
 	if (unlikely(PageHuge(pvmw->page))) {
 		/* when pud is not present, pte will be NULL */
@@ -165,9 +161,14 @@ restart:
 	while (1) {
 		if (check_pte(pvmw))
 			return true;
-next_pte:	do {
+next_pte:
+		/* Seek to next pte only makes sense for THP */
+		if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+			return not_found(pvmw);
+		do {
 			pvmw->address += PAGE_SIZE;
-			if (pvmw->address >=
+			if (pvmw->address >= pvmw->vma->vm_end ||
+			    pvmw->address >=
 					__vma_address(pvmw->page, pvmw->vma) +
 					hpage_nr_pages(pvmw->page) * PAGE_SIZE)
 				return not_found(pvmw);

From 045098e944959d4cbd56bbf33e2f26045863b7ca Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Fri, 7 Apr 2017 16:04:42 -0700
Subject: [PATCH 300/410] userfaultfd: report actual registered features in
 fdinfo

fdinfo for userfault file descriptor reports UFFD_API_FEATURES.  Up
until recently, the UFFD_API_FEATURES was defined as 0, therefore
corresponding field in fdinfo always contained zero.  Now, with
introduction of several additional features, UFFD_API_FEATURES is not
longer 0 and it seems better to report actual features requested for the
userfaultfd object described by the fdinfo.

First, the applications that were using userfault will still see zero at
the features field in fdinfo.  Next, reporting actual features rather
than available features, gives clear indication of what userfault
features are used by an application.

Link: http://lkml.kernel.org/r/1491140181-22121-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 1d227b0fcf49..f7555fc25877 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1756,7 +1756,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)
 	 *	protocols: aa:... bb:...
 	 */
 	seq_printf(m, "pending:\t%lu\ntotal:\t%lu\nAPI:\t%Lx:%x:%Lx\n",
-		   pending, total, UFFD_API, UFFD_API_FEATURES,
+		   pending, total, UFFD_API, ctx->features,
 		   UFFD_API_IOCTLS|UFFD_API_RANGE_IOCTLS);
 }
 #endif

From 1f06b81aea5ecba2c1f8afd87e0ba1b9f8f90160 Mon Sep 17 00:00:00 2001
From: Alexander Polakov <apolyakov@beget.ru>
Date: Fri, 7 Apr 2017 16:04:45 -0700
Subject: [PATCH 301/410] mm/page_alloc.c: fix print order in show_free_areas()

Fixes: 11fb998986a72a ("mm: move most file-based accounting to the node")
Link: http://lkml.kernel.org/r/1490377730.30219.2.camel@beget.ru
Signed-off-by: Alexander Polyakov <apolyakov@beget.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6cbde310abed..d6a665057d61 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4519,13 +4519,13 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
 			K(node_page_state(pgdat, NR_FILE_MAPPED)),
 			K(node_page_state(pgdat, NR_FILE_DIRTY)),
 			K(node_page_state(pgdat, NR_WRITEBACK)),
+			K(node_page_state(pgdat, NR_SHMEM)),
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 			K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
 			K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
 					* HPAGE_PMD_NR),
 			K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
 #endif
-			K(node_page_state(pgdat, NR_SHMEM)),
 			K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
 			K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
 			node_page_state(pgdat, NR_PAGES_SCANNED),

From d79bf21e0e02f8ad24219f0587cc599a7e67f6c6 Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@redhat.com>
Date: Fri, 7 Apr 2017 16:04:48 -0700
Subject: [PATCH 302/410] vmlinux.lds: add missing VMLINUX_SYMBOL macros

When __{start,end}_ro_after_init is referenced from C code, we run into
the following build errors on blackfin:

  kernel/extable.c:169: undefined reference to `__start_ro_after_init'
  kernel/extable.c:169: undefined reference to `__end_ro_after_init'

The build error is due to the fact that blackfin is one of the few
arches that prepends an underscore '_' to all symbols defined in C.

Fix this by wrapping __{start,end}_ro_after_init in vmlinux.lds.h with
VMLINUX_SYMBOL(), which adds the necessary prefix for arches that have
HAVE_UNDERSCORE_SYMBOL_PREFIX.

Link: http://lkml.kernel.org/r/1491259387-15869-1-git-send-email-jeyu@redhat.com
Signed-off-by: Jessica Yu <jeyu@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Eddie Kovsky <ewk@edkovsky.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7cdfe167074f..143db9c523e2 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -261,9 +261,9 @@
  */
 #ifndef RO_AFTER_INIT_DATA
 #define RO_AFTER_INIT_DATA						\
-	__start_ro_after_init = .;					\
+	VMLINUX_SYMBOL(__start_ro_after_init) = .;			\
 	*(.data..ro_after_init)						\
-	__end_ro_after_init = .;
+	VMLINUX_SYMBOL(__end_ro_after_init) = .;
 #endif
 
 /*

From 5402e97af667e35e54177af8f6575518bf251d51 Mon Sep 17 00:00:00 2001
From: "bsegall@google.com" <bsegall@google.com>
Date: Fri, 7 Apr 2017 16:04:51 -0700
Subject: [PATCH 303/410] ptrace: fix PTRACE_LISTEN race corrupting task->state

In PT_SEIZED + LISTEN mode STOP/CONT signals cause a wakeup against
__TASK_TRACED.  If this races with the ptrace_unfreeze_traced at the end
of a PTRACE_LISTEN, this can wake the task /after/ the check against
__TASK_TRACED, but before the reset of state to TASK_TRACED.  This
causes it to instead clobber TASK_WAKING, allowing a subsequent wakeup
against TRACED while the task is still on the rq wake_list, corrupting
it.

Oleg said:
 "The kernel can crash or this can lead to other hard-to-debug problems.
  In short, "task->state = TASK_TRACED" in ptrace_unfreeze_traced()
  assumes that nobody else can wake it up, but PTRACE_LISTEN breaks the
  contract. Obviusly it is very wrong to manipulate task->state if this
  task is already running, or WAKING, or it sleeps again"

[akpm@linux-foundation.org: coding-style fixes]
Fixes: 9899d11f ("ptrace: ensure arch_ptrace/ptrace_request can never race with SIGKILL")
Link: http://lkml.kernel.org/r/xm26y3vfhmkp.fsf_-_@bsegall-linux.mtv.corp.google.com
Signed-off-by: Ben Segall <bsegall@google.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/ptrace.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0af928712174..266ddcc1d8bb 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
 
 	WARN_ON(!task->ptrace || task->parent != current);
 
+	/*
+	 * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
+	 * Recheck state under the lock to close this race.
+	 */
 	spin_lock_irq(&task->sighand->siglock);
-	if (__fatal_signal_pending(task))
-		wake_up_state(task, __TASK_TRACED);
-	else
-		task->state = TASK_TRACED;
+	if (task->state == __TASK_TRACED) {
+		if (__fatal_signal_pending(task))
+			wake_up_state(task, __TASK_TRACED);
+		else
+			task->state = TASK_TRACED;
+	}
 	spin_unlock_irq(&task->sighand->siglock);
 }
 

From 4fad7fb6b0279a85e16e6a63e0f1f7d98cedddf1 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 7 Apr 2017 16:04:54 -0700
Subject: [PATCH 304/410] mm, thp: fix setting of defer+madvise thp defrag mode

Setting thp defrag mode of "defer+madvise" actually sets "defer" in the
kernel due to the name similarity and the out-of-order way the string is
checked in defrag_store().

Check the string in the correct order so that
TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG is set appropriately for
"defer+madvise".

Fixes: 21440d7eb904 ("mm, thp: add new defer+madvise defrag option")
Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1704051814420.137626@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1ebc93e179f3..fef4cf210cc7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -240,18 +240,18 @@ static ssize_t defrag_store(struct kobject *kobj,
 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
 		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-	} else if (!memcmp("defer", buf,
-		    min(sizeof("defer")-1, count))) {
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
-		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
-		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
 	} else if (!memcmp("defer+madvise", buf,
 		    min(sizeof("defer+madvise")-1, count))) {
 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
 		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+	} else if (!memcmp("defer", buf,
+		    min(sizeof("defer")-1, count))) {
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
+		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
+		set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
 	} else if (!memcmp("madvise", buf,
 			   min(sizeof("madvise")-1, count))) {
 		clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);

From e11f8b7b6c4ea13bf8af6b8f42b45e15b554a92b Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Fri, 7 Apr 2017 16:04:57 -0700
Subject: [PATCH 305/410] dax: fix radix tree insertion race

While running generic/340 in my test setup I hit the following race.  It
can happen with kernels that support FS DAX PMDs, so v4.10 thru
v4.11-rc5.

Thread 1				Thread 2
--------				--------
dax_iomap_pmd_fault()
  grab_mapping_entry()
    spin_lock_irq()
    get_unlocked_mapping_entry()
    'entry' is NULL, can't call lock_slot()
    spin_unlock_irq()
    radix_tree_preload()
					dax_iomap_pmd_fault()
					  grab_mapping_entry()
					    spin_lock_irq()
					    get_unlocked_mapping_entry()
					    ...
					    lock_slot()
					    spin_unlock_irq()
					  dax_pmd_insert_mapping()
					    <inserts a PMD mapping>
    spin_lock_irq()
    __radix_tree_insert() fails with -EEXIST
    <fall back to 4k fault, and die horribly
     when inserting a 4k entry where a PMD exists>

The issue is that we have to drop mapping->tree_lock while calling
radix_tree_preload(), but since we didn't have a radix tree entry to
lock (unlike in the pmd_downgrade case) we have no protection against
Thread 2 coming along and inserting a PMD at the same index.  For 4k
entries we handled this with a special-case response to -EEXIST coming
from the __radix_tree_insert(), but this doesn't save us for PMDs
because the -EEXIST case can also mean that we collided with a 4k entry
in the radix tree at a different index, but one that is covered by our
PMD range.

So, correctly handle both the 4k and 2M collision cases by explicitly
re-checking the radix tree for an entry at our index once we reacquire
mapping->tree_lock.

This patch has made it through a clean xfstests run with the current
v4.11-rc5 based linux/master, and it also ran generic/340 500 times in a
loop.  It used to fail within the first 10 iterations.

Link: http://lkml.kernel.org/r/20170406212944.2866-1-ross.zwisler@linux.intel.com
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: "Darrick J. Wong" <darrick.wong@oracle.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: <stable@vger.kernel.org>    [4.10+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/dax.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index de622d4282a6..85abd741253d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -373,6 +373,22 @@ restart:
 		}
 		spin_lock_irq(&mapping->tree_lock);
 
+		if (!entry) {
+			/*
+			 * We needed to drop the page_tree lock while calling
+			 * radix_tree_preload() and we didn't have an entry to
+			 * lock.  See if another thread inserted an entry at
+			 * our index during this time.
+			 */
+			entry = __radix_tree_lookup(&mapping->page_tree, index,
+					NULL, &slot);
+			if (entry) {
+				radix_tree_preload_end();
+				spin_unlock_irq(&mapping->tree_lock);
+				goto restart;
+			}
+		}
+
 		if (pmd_downgrade) {
 			radix_tree_delete(&mapping->page_tree, index);
 			mapping->nrexceptional--;
@@ -388,19 +404,12 @@ restart:
 		if (err) {
 			spin_unlock_irq(&mapping->tree_lock);
 			/*
-			 * Someone already created the entry?  This is a
-			 * normal failure when inserting PMDs in a range
-			 * that already contains PTEs.  In that case we want
-			 * to return -EEXIST immediately.
-			 */
-			if (err == -EEXIST && !(size_flag & RADIX_DAX_PMD))
-				goto restart;
-			/*
-			 * Our insertion of a DAX PMD entry failed, most
-			 * likely because it collided with a PTE sized entry
-			 * at a different index in the PMD range.  We haven't
-			 * inserted anything into the radix tree and have no
-			 * waiters to wake.
+			 * Our insertion of a DAX entry failed, most likely
+			 * because we were inserting a PMD entry and it
+			 * collided with a PTE sized entry at a different
+			 * index in the PMD range.  We haven't inserted
+			 * anything into the radix tree and have no waiters to
+			 * wake.
 			 */
 			return ERR_PTR(err);
 		}

From 460bcec84e11c75122ace5976214abbc596eb91b Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 7 Apr 2017 16:05:00 -0700
Subject: [PATCH 306/410] mm, swap_cgroup: reschedule when neeed in
 swap_cgroup_swapoff()

We got need_resched() warnings in swap_cgroup_swapoff() because
swap_cgroup_ctrl[type].length is particularly large.

Reschedule when needed.

Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1704061315270.80559@chino.kir.corp.google.com
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swap_cgroup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index 310ac0b8f974..ac6318a064d3 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -201,6 +201,8 @@ void swap_cgroup_swapoff(int type)
 			struct page *page = map[i];
 			if (page)
 				__free_page(page);
+			if (!(i % SWAP_CLUSTER_MAX))
+				cond_resched();
 		}
 		vfree(map);
 	}

From cdcf4330d5660998d06fcd899b443693ab3d652f Mon Sep 17 00:00:00 2001
From: Jeffy Chen <jeffy.chen@rock-chips.com>
Date: Fri, 7 Apr 2017 16:05:02 -0700
Subject: [PATCH 307/410] mailmap: update Yakir Yang email address

Set current email address to replace previous employers email addresses.

Link: http://lkml.kernel.org/r/1491450722-6633-1-git-send-email-jeffy.chen@rock-chips.com
Signed-off-by: Jeffy Chen <jeffy.chen@rock-chips.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index 67dc22ffc9a8..e229922dc7f0 100644
--- a/.mailmap
+++ b/.mailmap
@@ -171,6 +171,7 @@ Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
 Takashi YOSHII <takashi.yoshii.zj@renesas.com>
+Yakir Yang <kuankuan.y@gmail.com> <ykk@rock-chips.com>
 Yusuke Goda <goda.yusuke@renesas.com>
 Gustavo Padovan <gustavo@las.ic.unicamp.br>
 Gustavo Padovan <padovan@profusion.mobi>

From ce612879ddc78ea7e4de4be80cba4ebf9caa07ee Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 7 Apr 2017 16:05:05 -0700
Subject: [PATCH 308/410] mm: move pcp and lru-pcp draining into single wq

We currently have 2 specific WQ_RECLAIM workqueues in the mm code.
vmstat_wq for updating pcp stats and lru_add_drain_wq dedicated to drain
per cpu lru caches.  This seems more than necessary because both can run
on a single WQ.  Both do not block on locks requiring a memory
allocation nor perform any allocations themselves.  We will save one
rescuer thread this way.

On the other hand drain_all_pages() queues work on the system wq which
doesn't have rescuer and so this depend on memory allocation (when all
workers are stuck allocating and new ones cannot be created).

Initially we thought this would be more of a theoretical problem but
Hugh Dickins has reported:

: 4.11-rc has been giving me hangs after hours of swapping load.  At
: first they looked like memory leaks ("fork: Cannot allocate memory");
: but for no good reason I happened to do "cat /proc/sys/vm/stat_refresh"
: before looking at /proc/meminfo one time, and the stat_refresh stuck
: in D state, waiting for completion of flush_work like many kworkers.
: kthreadd waiting for completion of flush_work in drain_all_pages().

This worker should be using WQ_RECLAIM as well in order to guarantee a
forward progress.  We can reuse the same one as for lru draining and
vmstat.

Link: http://lkml.kernel.org/r/20170307131751.24936-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Suggested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@suse.de>
Tested-by: Yang Li <pku.leo@gmail.com>
Tested-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/internal.h   |  7 +++++++
 mm/page_alloc.c |  9 ++++++++-
 mm/swap.c       | 27 ++++++++-------------------
 mm/vmstat.c     | 15 +++++++++------
 4 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index ccfc2a2969f4..266efaeaa370 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -481,6 +481,13 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
 enum ttu_flags;
 struct tlbflush_unmap_batch;
 
+
+/*
+ * only for MM internal work items which do not depend on
+ * any allocations or locks which might depend on allocations
+ */
+extern struct workqueue_struct *mm_percpu_wq;
+
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 void try_to_unmap_flush(void);
 void try_to_unmap_flush_dirty(void);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d6a665057d61..f3d603cef2c0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2373,6 +2373,13 @@ void drain_all_pages(struct zone *zone)
 	 */
 	static cpumask_t cpus_with_pcps;
 
+	/*
+	 * Make sure nobody triggers this path before mm_percpu_wq is fully
+	 * initialized.
+	 */
+	if (WARN_ON_ONCE(!mm_percpu_wq))
+		return;
+
 	/* Workqueues cannot recurse */
 	if (current->flags & PF_WQ_WORKER)
 		return;
@@ -2422,7 +2429,7 @@ void drain_all_pages(struct zone *zone)
 	for_each_cpu(cpu, &cpus_with_pcps) {
 		struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu);
 		INIT_WORK(work, drain_local_pages_wq);
-		schedule_work_on(cpu, work);
+		queue_work_on(cpu, mm_percpu_wq, work);
 	}
 	for_each_cpu(cpu, &cpus_with_pcps)
 		flush_work(per_cpu_ptr(&pcpu_drain, cpu));
diff --git a/mm/swap.c b/mm/swap.c
index c4910f14f957..5dabf444d724 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -670,30 +670,19 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
 
 static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 
-/*
- * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
- * workqueue, aiding in getting memory freed.
- */
-static struct workqueue_struct *lru_add_drain_wq;
-
-static int __init lru_init(void)
-{
-	lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
-
-	if (WARN(!lru_add_drain_wq,
-		"Failed to create workqueue lru_add_drain_wq"))
-		return -ENOMEM;
-
-	return 0;
-}
-early_initcall(lru_init);
-
 void lru_add_drain_all(void)
 {
 	static DEFINE_MUTEX(lock);
 	static struct cpumask has_work;
 	int cpu;
 
+	/*
+	 * Make sure nobody triggers this path before mm_percpu_wq is fully
+	 * initialized.
+	 */
+	if (WARN_ON(!mm_percpu_wq))
+		return;
+
 	mutex_lock(&lock);
 	get_online_cpus();
 	cpumask_clear(&has_work);
@@ -707,7 +696,7 @@ void lru_add_drain_all(void)
 		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
 		    need_activate_page_drain(cpu)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
-			queue_work_on(cpu, lru_add_drain_wq, work);
+			queue_work_on(cpu, mm_percpu_wq, work);
 			cpumask_set_cpu(cpu, &has_work);
 		}
 	}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 89f95396ec46..809025ed97ea 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1552,7 +1552,6 @@ static const struct file_operations proc_vmstat_file_operations = {
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_SMP
-static struct workqueue_struct *vmstat_wq;
 static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
 int sysctl_stat_interval __read_mostly = HZ;
 
@@ -1623,7 +1622,7 @@ static void vmstat_update(struct work_struct *w)
 		 * to occur in the future. Keep on running the
 		 * update worker thread.
 		 */
-		queue_delayed_work_on(smp_processor_id(), vmstat_wq,
+		queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
 				this_cpu_ptr(&vmstat_work),
 				round_jiffies_relative(sysctl_stat_interval));
 	}
@@ -1702,7 +1701,7 @@ static void vmstat_shepherd(struct work_struct *w)
 		struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
 
 		if (!delayed_work_pending(dw) && need_update(cpu))
-			queue_delayed_work_on(cpu, vmstat_wq, dw, 0);
+			queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
 	}
 	put_online_cpus();
 
@@ -1718,7 +1717,6 @@ static void __init start_shepherd_timer(void)
 		INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
 			vmstat_update);
 
-	vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
 	schedule_delayed_work(&shepherd,
 		round_jiffies_relative(sysctl_stat_interval));
 }
@@ -1764,11 +1762,16 @@ static int vmstat_cpu_dead(unsigned int cpu)
 
 #endif
 
+struct workqueue_struct *mm_percpu_wq;
+
 void __init init_mm_internals(void)
 {
-#ifdef CONFIG_SMP
-	int ret;
+	int ret __maybe_unused;
 
+	mm_percpu_wq = alloc_workqueue("mm_percpu_wq",
+				       WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+
+#ifdef CONFIG_SMP
 	ret = cpuhp_setup_state_nocalls(CPUHP_MM_VMSTAT_DEAD, "mm/vmstat:dead",
 					NULL, vmstat_cpu_dead);
 	if (ret < 0)

From 97fbfef6bd597888485b653175fb846c6998b60c Mon Sep 17 00:00:00 2001
From: Shuxiao Zhang <zhangshuxiao@xiaomi.com>
Date: Thu, 6 Apr 2017 22:30:29 +0800
Subject: [PATCH 309/410] staging: android: ashmem: lseek failed due to no
 FMODE_LSEEK.

vfs_llseek will check whether the file mode has
FMODE_LSEEK, no return failure. But ashmem can be
lseek, so add FMODE_LSEEK to ashmem file.

Comment From Greg Hackmann:
	ashmem_llseek() passes the llseek() call through to the backing
	shmem file.  91360b02ab48 ("ashmem: use vfs_llseek()") changed
	this from directly calling the file's llseek() op into a VFS
	layer call.  This also adds a check for the FMODE_LSEEK bit, so
	without that bit ashmem_llseek() now always fails with -ESPIPE.

Fixes: 91360b02ab48 ("ashmem: use vfs_llseek()")
Signed-off-by: Shuxiao Zhang <zhangshuxiao@xiaomi.com>
Tested-by: Greg Hackmann <ghackmann@google.com>
Cc: stable <stable@vger.kernel.org> # 3.18+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/android/ashmem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index 7cbad0d45b9c..6ba270e0494d 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -409,6 +409,7 @@ static int ashmem_mmap(struct file *file, struct vm_area_struct *vma)
 			ret = PTR_ERR(vmfile);
 			goto out;
 		}
+		vmfile->f_mode |= FMODE_LSEEK;
 		asma->file = vmfile;
 	}
 	get_file(asma->file);

From 364700cf8fd54f54ad08313464105a414e3bccb7 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Thu, 6 Apr 2017 18:31:20 +0200
Subject: [PATCH 310/410] l2tp: don't mask errors in pppol2tp_setsockopt()

pppol2tp_setsockopt() unconditionally overwrites the error value
returned by pppol2tp_tunnel_setsockopt() or
pppol2tp_session_setsockopt(), thus hiding errors from userspace.

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 861b255a2d51..973a9185b276 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1383,8 +1383,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
 	} else
 		err = pppol2tp_session_setsockopt(sk, session, optname, val);
 
-	err = 0;
-
 end_put_sess:
 	sock_put(sk);
 end:

From 321a52a39189d5e4af542f7dcdc07bba4545cf5d Mon Sep 17 00:00:00 2001
From: Guillaume Nault <g.nault@alphalink.fr>
Date: Thu, 6 Apr 2017 18:31:21 +0200
Subject: [PATCH 311/410] l2tp: don't mask errors in pppol2tp_getsockopt()

pppol2tp_getsockopt() doesn't take into account the error code returned
by pppol2tp_tunnel_getsockopt() or pppol2tp_session_getsockopt(). If
error occurs there, pppol2tp_getsockopt() continues unconditionally and
reports erroneous values.

Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 973a9185b276..32ea0f3d868c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1505,8 +1505,13 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
 
 		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
 		sock_put(ps->tunnel_sock);
-	} else
+		if (err)
+			goto end_put_sess;
+	} else {
 		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+		if (err)
+			goto end_put_sess;
+	}
 
 	err = -EFAULT;
 	if (put_user(len, optlen))

From 7cc2b043bc3f1e8139e807528c8041c15924a411 Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Thu, 6 Apr 2017 23:05:49 +0800
Subject: [PATCH 312/410] net: tcp: Increase TCP_MIB_OUTRSTS even though fail
 to alloc skb

Because TCP_MIB_OUTRSTS is an important count, so always increase it
whatever send it successfully or not.

Now move the increment of TCP_MIB_OUTRSTS to the top of
tcp_send_active_reset to make sure it is increased always even though
fail to alloc skb.

Signed-off-by: Gao Feng <fgao@ikuai8.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 22548b5f05cb..c3c082ed3879 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2999,6 +2999,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 {
 	struct sk_buff *skb;
 
+	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
+
 	/* NOTE: No TCP options attached and we never retransmit this. */
 	skb = alloc_skb(MAX_TCP_HEADER, priority);
 	if (!skb) {
@@ -3014,8 +3016,6 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	/* Send it off. */
 	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
-
-	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
 }
 
 /* Send a crossed SYN-ACK during socket establishment.

From cf903e9d3a97f89b224d2d07be37c0f160db8192 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 3 Apr 2017 15:53:34 +0200
Subject: [PATCH 313/410] Documentation: stable-kernel-rules: fix stable-tag
 format

A patch documenting how to specify which kernels a particular fix should
be backported to (seemingly) inadvertently added a minus sign after the
kernel version. This particular stable-tag format had never been used
prior to this patch, and was neither present when the patch in question
was first submitted (it was added in v2 without any comment).

Drop the minus sign to avoid any confusion.

Fixes: fdc81b7910ad ("stable_kernel_rules: Add clause about specification of kernel versions to patch.")
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/process/stable-kernel-rules.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst
index 11ec2d93a5e0..61e9c78bd6d1 100644
--- a/Documentation/process/stable-kernel-rules.rst
+++ b/Documentation/process/stable-kernel-rules.rst
@@ -124,7 +124,7 @@ specified in the following format in the sign-off area:
 
 .. code-block:: none
 
-     Cc: <stable@vger.kernel.org> # 3.3.x-
+     Cc: <stable@vger.kernel.org> # 3.3.x
 
 The tag has the meaning of:
 

From c8a139d001a1aab1ea8734db14b22dac9dd143b6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Mon, 3 Apr 2017 11:30:34 +1000
Subject: [PATCH 314/410] sysfs: be careful of error returns from ops->show()

ops->show() can return a negative error code.
Commit 65da3484d9be ("sysfs: correctly handle short reads on PREALLOC attrs.")
(in v4.4) caused this to be stored in an unsigned 'size_t' variable, so errors
would look like large numbers.
As a result, if an error is returned, sysfs_kf_read() will return the
value of 'count', typically 4096.

Commit 17d0774f8068 ("sysfs: correctly handle read offset on PREALLOC attrs")
(in v4.8) extended this error to use the unsigned large 'len' as a size for
memmove().
Consequently, if ->show returns an error, then the first read() on the
sysfs file will return 4096 and could return uninitialized memory to
user-space.
If the application performs a subsequent read, this will trigger a memmove()
with extremely large count, and is likely to crash the machine is bizarre ways.

This bug can currently only be triggered by reading from an md
sysfs attribute declared with __ATTR_PREALLOC() during the
brief period between when mddev_put() deletes an mddev from
the ->all_mddevs list, and when mddev_delayed_delete() - which is
scheduled on a workqueue - completes.
Before this, an error won't be returned by the ->show()
After this, the ->show() won't be called.

I can reproduce it reliably only by putting delay like
	usleep_range(500000,700000);
early in mddev_delayed_delete(). Then after creating an
md device md0 run
  echo clear > /sys/block/md0/md/array_state; cat /sys/block/md0/md/array_state

The bug can be triggered without the usleep.

Fixes: 65da3484d9be ("sysfs: correctly handle short reads on PREALLOC attrs.")
Fixes: 17d0774f8068 ("sysfs: correctly handle read offset on PREALLOC attrs")
Cc: stable@vger.kernel.org
Signed-off-by: NeilBrown <neilb@suse.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/file.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index b803213d1307..39c75a86c67f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -108,7 +108,7 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
 {
 	const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
 	struct kobject *kobj = of->kn->parent->priv;
-	size_t len;
+	ssize_t len;
 
 	/*
 	 * If buf != of->prealloc_buf, we don't know how
@@ -117,13 +117,15 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
 	if (WARN_ON_ONCE(buf != of->prealloc_buf))
 		return 0;
 	len = ops->show(kobj, of->kn->priv, buf);
+	if (len < 0)
+		return len;
 	if (pos) {
 		if (len <= pos)
 			return 0;
 		len -= pos;
 		memmove(buf, buf + pos, len);
 	}
-	return min(count, len);
+	return min_t(ssize_t, count, len);
 }
 
 /* kernfs write callback for regular sysfs files */

From 27f395b857abee5bced8356d39e1a491ff08748a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 23 Mar 2017 13:34:47 -0400
Subject: [PATCH 315/410] MAINTAINERS: separate out kernfs maintainership

Separate out kernfs from driver core and add myself as a
co-maintainer.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c45c02bc6082..6af8df4b8680 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4117,14 +4117,13 @@ F:	drivers/block/drbd/
 F:	lib/lru_cache.c
 F:	Documentation/blockdev/drbd/
 
-DRIVER CORE, KOBJECTS, DEBUGFS, KERNFS AND SYSFS
+DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
 S:	Supported
 F:	Documentation/kobject.txt
 F:	drivers/base/
 F:	fs/debugfs/
-F:	fs/kernfs/
 F:	fs/sysfs/
 F:	include/linux/debugfs.h
 F:	include/linux/kobj*
@@ -7202,6 +7201,14 @@ F:	arch/mips/include/uapi/asm/kvm*
 F:	arch/mips/include/asm/kvm*
 F:	arch/mips/kvm/
 
+KERNFS
+M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+M:	Tejun Heo <tj@kernel.org>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core.git
+S:	Supported
+F:	include/linux/kernfs.h
+F:	fs/kernfs/
+
 KEXEC
 M:	Eric Biederman <ebiederm@xmission.com>
 W:	http://kernel.org/pub/linux/utils/kernel/kexec/

From 425fffd886bae3d127a08fa6a17f2e31e24ed7ff Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Fri, 7 Apr 2017 23:51:07 +0800
Subject: [PATCH 316/410] sysctl: report EINVAL if value is larger than
 UINT_MAX for proc_douintvec

Currently, inputting the following command will succeed but actually the
value will be truncated:

  # echo 0x12ffffffff > /proc/sys/net/ipv4/tcp_notsent_lowat

This is not friendly to the user, so instead, we should report error
when the value is larger than UINT_MAX.

Fixes: e7d316a02f68 ("sysctl: handle error writing UINT_MAX to u32 fields")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Cc: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8cca4c7bb21f..8c8714fcb53c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2133,6 +2133,8 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
 	if (write) {
 		if (*negp)
 			return -EINVAL;
+		if (*lvalp > UINT_MAX)
+			return -EINVAL;
 		*valp = *lvalp;
 	} else {
 		unsigned int val = *valp;

From cf01fb9985e8deb25ccf0ea54d916b8871ae0e62 Mon Sep 17 00:00:00 2001
From: Chris Salls <salls@cs.ucsb.edu>
Date: Fri, 7 Apr 2017 23:48:11 -0700
Subject: [PATCH 317/410] mm/mempolicy.c: fix error handling in set_mempolicy
 and mbind.

In the case that compat_get_bitmap fails we do not want to copy the
bitmap to the user as it will contain uninitialized stack data and leak
sensitive data.

Signed-off-by: Chris Salls <salls@cs.ucsb.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mempolicy.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 75b2745bac41..37d0b334bfe9 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1529,7 +1529,6 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
 COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
 		       compat_ulong_t, maxnode)
 {
-	long err = 0;
 	unsigned long __user *nm = NULL;
 	unsigned long nr_bits, alloc_size;
 	DECLARE_BITMAP(bm, MAX_NUMNODES);
@@ -1538,14 +1537,13 @@ COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
 	alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
 
 	if (nmask) {
-		err = compat_get_bitmap(bm, nmask, nr_bits);
+		if (compat_get_bitmap(bm, nmask, nr_bits))
+			return -EFAULT;
 		nm = compat_alloc_user_space(alloc_size);
-		err |= copy_to_user(nm, bm, alloc_size);
+		if (copy_to_user(nm, bm, alloc_size))
+			return -EFAULT;
 	}
 
-	if (err)
-		return -EFAULT;
-
 	return sys_set_mempolicy(mode, nm, nr_bits+1);
 }
 
@@ -1553,7 +1551,6 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
 		       compat_ulong_t, mode, compat_ulong_t __user *, nmask,
 		       compat_ulong_t, maxnode, compat_ulong_t, flags)
 {
-	long err = 0;
 	unsigned long __user *nm = NULL;
 	unsigned long nr_bits, alloc_size;
 	nodemask_t bm;
@@ -1562,14 +1559,13 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
 	alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
 
 	if (nmask) {
-		err = compat_get_bitmap(nodes_addr(bm), nmask, nr_bits);
+		if (compat_get_bitmap(nodes_addr(bm), nmask, nr_bits))
+			return -EFAULT;
 		nm = compat_alloc_user_space(alloc_size);
-		err |= copy_to_user(nm, nodes_addr(bm), alloc_size);
+		if (copy_to_user(nm, nodes_addr(bm), alloc_size))
+			return -EFAULT;
 	}
 
-	if (err)
-		return -EFAULT;
-
 	return sys_mbind(start, len, mode, nm, nr_bits+1, flags);
 }
 

From 2638fd0f92d4397884fd991d8f4925cb3f081901 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 3 Apr 2017 10:55:11 -0700
Subject: [PATCH 318/410] netfilter: xt_TCPMSS: add more sanity tests on
 tcph->doff

Denys provided an awesome KASAN report pointing to an use
after free in xt_TCPMSS

I have provided three patches to fix this issue, either in xt_TCPMSS or
in xt_tcpudp.c. It seems xt_TCPMSS patch has the smallest possible
impact.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Denys Fedoryshchenko <nuclearcat@nuclearcat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_TCPMSS.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 27241a767f17..c64aca611ac5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -104,7 +104,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
 	tcp_hdrlen = tcph->doff * 4;
 
-	if (len < tcp_hdrlen)
+	if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr))
 		return -1;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
@@ -152,6 +152,10 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	if (len > tcp_hdrlen)
 		return 0;
 
+	/* tcph->doff has 4 bits, do not wrap it to 0 */
+	if (tcp_hdrlen >= 15 * 4)
+		return 0;
+
 	/*
 	 * MSS Option not found ?! add it..
 	 */

From 97aae0df1de4d7dd80905fb067e28b032a132995 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 1 Apr 2017 20:31:32 +0800
Subject: [PATCH 319/410] netfilter: ctnetlink: using bit to represent the ct
 event

Otherwise, creating a new conntrack via nfnetlink:
  # conntrack -I -p udp -s 1.1.1.1 -d 2.2.2.2 -t 10 --sport 10 --dport 20

will emit the wrong ct events(where UPDATE should be NEW):
  # conntrack -E
  [UPDATE] udp      17 10 src=1.1.1.1 dst=2.2.2.2 sport=10 dport=20
  [UNREPLIED] src=2.2.2.2 dst=1.1.1.1 sport=20 dport=10 mark=0

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 908d858034e4..59ee27deb9a0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1929,9 +1929,9 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl,
 
 			err = 0;
 			if (test_bit(IPS_EXPECTED_BIT, &ct->status))
-				events = IPCT_RELATED;
+				events = 1 << IPCT_RELATED;
 			else
-				events = IPCT_NEW;
+				events = 1 << IPCT_NEW;
 
 			if (cda[CTA_LABELS] &&
 			    ctnetlink_attach_labels(ct, cda) == 0)

From 8b5995d0633b04f9a0d321a7cc77e386440730cf Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Wed, 29 Mar 2017 19:11:27 +0800
Subject: [PATCH 320/410] netfilter: helper: Add the rcu lock when call
 __nf_conntrack_helper_find

When invoke __nf_conntrack_helper_find, it needs the rcu lock to
protect the helper module which would not be unloaded.

Now there are two caller nf_conntrack_helper_try_module_get and
ctnetlink_create_expect which don't hold rcu lock. And the other
callers left like ctnetlink_change_helper, ctnetlink_create_conntrack,
and ctnetlink_glue_attach_expect, they already hold the rcu lock
or spin_lock_bh.

Remove the rcu lock in functions nf_ct_helper_expectfn_find_by_name
and nf_ct_helper_expectfn_find_by_symbol. Because they return one pointer
which needs rcu lock, so their caller should hold the rcu lock, not in
these two functions.

Signed-off-by: Gao Feng <fgao@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_helper.c  | 17 ++++++++++++-----
 net/netfilter/nf_conntrack_netlink.c | 10 ++++++++--
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 6dc44d9b4190..4eeb3418366a 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -158,16 +158,25 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum)
 {
 	struct nf_conntrack_helper *h;
 
+	rcu_read_lock();
+
 	h = __nf_conntrack_helper_find(name, l3num, protonum);
 #ifdef CONFIG_MODULES
 	if (h == NULL) {
-		if (request_module("nfct-helper-%s", name) == 0)
+		rcu_read_unlock();
+		if (request_module("nfct-helper-%s", name) == 0) {
+			rcu_read_lock();
 			h = __nf_conntrack_helper_find(name, l3num, protonum);
+		} else {
+			return h;
+		}
 	}
 #endif
 	if (h != NULL && !try_module_get(h->me))
 		h = NULL;
 
+	rcu_read_unlock();
+
 	return h;
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get);
@@ -311,38 +320,36 @@ void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n)
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister);
 
+/* Caller should hold the rcu lock */
 struct nf_ct_helper_expectfn *
 nf_ct_helper_expectfn_find_by_name(const char *name)
 {
 	struct nf_ct_helper_expectfn *cur;
 	bool found = false;
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) {
 		if (!strcmp(cur->name, name)) {
 			found = true;
 			break;
 		}
 	}
-	rcu_read_unlock();
 	return found ? cur : NULL;
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name);
 
+/* Caller should hold the rcu lock */
 struct nf_ct_helper_expectfn *
 nf_ct_helper_expectfn_find_by_symbol(const void *symbol)
 {
 	struct nf_ct_helper_expectfn *cur;
 	bool found = false;
 
-	rcu_read_lock();
 	list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) {
 		if (cur->expectfn == symbol) {
 			found = true;
 			break;
 		}
 	}
-	rcu_read_unlock();
 	return found ? cur : NULL;
 }
 EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 59ee27deb9a0..06d28ac663df 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3133,23 +3133,27 @@ ctnetlink_create_expect(struct net *net,
 		return -ENOENT;
 	ct = nf_ct_tuplehash_to_ctrack(h);
 
+	rcu_read_lock();
 	if (cda[CTA_EXPECT_HELP_NAME]) {
 		const char *helpname = nla_data(cda[CTA_EXPECT_HELP_NAME]);
 
 		helper = __nf_conntrack_helper_find(helpname, u3,
 						    nf_ct_protonum(ct));
 		if (helper == NULL) {
+			rcu_read_unlock();
 #ifdef CONFIG_MODULES
 			if (request_module("nfct-helper-%s", helpname) < 0) {
 				err = -EOPNOTSUPP;
 				goto err_ct;
 			}
+			rcu_read_lock();
 			helper = __nf_conntrack_helper_find(helpname, u3,
 							    nf_ct_protonum(ct));
 			if (helper) {
 				err = -EAGAIN;
-				goto err_ct;
+				goto err_rcu;
 			}
+			rcu_read_unlock();
 #endif
 			err = -EOPNOTSUPP;
 			goto err_ct;
@@ -3159,11 +3163,13 @@ ctnetlink_create_expect(struct net *net,
 	exp = ctnetlink_alloc_expect(cda, ct, helper, &tuple, &mask);
 	if (IS_ERR(exp)) {
 		err = PTR_ERR(exp);
-		goto err_ct;
+		goto err_rcu;
 	}
 
 	err = nf_ct_expect_related_report(exp, portid, report);
 	nf_ct_expect_put(exp);
+err_rcu:
+	rcu_read_unlock();
 err_ct:
 	nf_ct_put(ct);
 	return err;

From 3173d5b8c89e67fa3176292ff9af06f09f365348 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sat, 1 Apr 2017 20:55:44 +0800
Subject: [PATCH 321/410] netfilter: ctnetlink: make it safer when checking the
 ct helper name

One CPU is doing ctnetlink_change_helper(), while another CPU is doing
unhelp() at the same time. So even if help->helper is not NULL at first,
the later statement strcmp(help->helper->name, ...) may still access
the NULL pointer.

So we must use rcu_read_lock and rcu_dereference to avoid such _bad_
thing happen.

Fixes: f95d7a46bc57 ("netfilter: ctnetlink: Fix regression in CTA_HELP processing")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 06d28ac663df..f9c643bc1a8e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1488,11 +1488,16 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
 		 * treat the second attempt as a no-op instead of returning
 		 * an error.
 		 */
-		if (help && help->helper &&
-		    !strcmp(help->helper->name, helpname))
-			return 0;
-		else
-			return -EBUSY;
+		err = -EBUSY;
+		if (help) {
+			rcu_read_lock();
+			helper = rcu_dereference(help->helper);
+			if (helper && !strcmp(helper->name, helpname))
+				err = 0;
+			rcu_read_unlock();
+		}
+
+		return err;
 	}
 
 	if (!strcmp(helpname, "")) {

From 0c7930e5763bdd189bd50035c025a9cbe5e82f23 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 2 Apr 2017 17:27:53 +0800
Subject: [PATCH 322/410] netfilter: make it safer during the
 inet6_dev->addr_list traversal

inet6_dev->addr_list is protected by inet6_dev->lock, so only using
rcu_read_lock is not enough, we should acquire read_lock_bh(&idev->lock)
before the inet6_dev->addr_list traversal.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_nat_redirect.c | 2 ++
 net/netfilter/xt_TPROXY.c       | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index d43869879fcf..86067560a318 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -101,11 +101,13 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
 		rcu_read_lock();
 		idev = __in6_dev_get(skb->dev);
 		if (idev != NULL) {
+			read_lock_bh(&idev->lock);
 			list_for_each_entry(ifa, &idev->addr_list, if_list) {
 				newdst = ifa->addr;
 				addr = true;
 				break;
 			}
+			read_unlock_bh(&idev->lock);
 		}
 		rcu_read_unlock();
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 80cb7babeb64..df7f1df00330 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -393,7 +393,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
 
 	rcu_read_lock();
 	indev = __in6_dev_get(skb->dev);
-	if (indev)
+	if (indev) {
+		read_lock_bh(&indev->lock);
 		list_for_each_entry(ifa, &indev->addr_list, if_list) {
 			if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
 				continue;
@@ -401,6 +402,8 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
 			laddr = &ifa->addr;
 			break;
 		}
+		read_unlock_bh(&indev->lock);
+	}
 	rcu_read_unlock();
 
 	return laddr ? laddr : daddr;

From 207df81501021f6d1a935cebf8e1f34d6d25564b Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 2 Apr 2017 18:01:33 +0800
Subject: [PATCH 323/410] netfilter: ctnetlink: skip dumping expect when
 nfct_help(ct) is NULL

For IPCTNL_MSG_EXP_GET, if the CTA_EXPECT_MASTER attr is specified, then
the NLM_F_DUMP request will dump the expectations related to this
connection tracking.

But we forget to check whether the conntrack has nf_conn_help or not,
so if nfct_help(ct) is NULL, oops will happen:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000008
 IP: ctnetlink_exp_ct_dump_table+0xf9/0x1e0 [nf_conntrack_netlink]
 Call Trace:
  ? ctnetlink_exp_ct_dump_table+0x75/0x1e0 [nf_conntrack_netlink]
  netlink_dump+0x124/0x2a0
  __netlink_dump_start+0x161/0x190
  ctnetlink_dump_exp_ct+0x16c/0x1bc [nf_conntrack_netlink]
  ? ctnetlink_exp_fill_info.constprop.33+0xf0/0xf0 [nf_conntrack_netlink]
  ? ctnetlink_glue_seqadj+0x20/0x20 [nf_conntrack_netlink]
  ctnetlink_get_expect+0x32e/0x370 [nf_conntrack_netlink]
  ? debug_lockdep_rcu_enabled+0x1d/0x20
  nfnetlink_rcv_msg+0x60a/0x6a9 [nfnetlink]
  ? nfnetlink_rcv_msg+0x1b9/0x6a9 [nfnetlink]
  [...]

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_netlink.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f9c643bc1a8e..f78eadba343d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2794,6 +2794,12 @@ static int ctnetlink_dump_exp_ct(struct net *net, struct sock *ctnl,
 		return -ENOENT;
 
 	ct = nf_ct_tuplehash_to_ctrack(h);
+	/* No expectation linked to this connection tracking. */
+	if (!nfct_help(ct)) {
+		nf_ct_put(ct);
+		return 0;
+	}
+
 	c.data = ct;
 
 	err = netlink_dump_start(ctnl, skb, nlh, &c);

From 7cddd967bfc2e4fc6b3218c2ddc67fbeed433ad3 Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Sun, 2 Apr 2017 18:25:37 +0800
Subject: [PATCH 324/410] netfilter: nf_ct_expect: use proper RCU list
 traversal/update APIs

We should use proper RCU list APIs to manipulate help->expectations,
as we can dump the conntrack's expectations via nfnetlink, i.e. in
ctnetlink_exp_ct_dump_table(), where only rcu_read_lock is acquired.

So for list traversal, use hlist_for_each_entry_rcu; for list add/del,
use hlist_add_head_rcu and hlist_del_rcu.

Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_expect.c  | 4 ++--
 net/netfilter/nf_conntrack_netlink.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4b2e1fb28bb4..d80073037856 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -57,7 +57,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 	hlist_del_rcu(&exp->hnode);
 	net->ct.expect_count--;
 
-	hlist_del(&exp->lnode);
+	hlist_del_rcu(&exp->lnode);
 	master_help->expecting[exp->class]--;
 
 	nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
@@ -363,7 +363,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 	/* two references : one for hash insert, one for the timer */
 	atomic_add(2, &exp->use);
 
-	hlist_add_head(&exp->lnode, &master_help->expectations);
+	hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
 	master_help->expecting[exp->class]++;
 
 	hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index f78eadba343d..dc7dfd68fafe 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2680,8 +2680,8 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	last = (struct nf_conntrack_expect *)cb->args[1];
 	for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) {
 restart:
-		hlist_for_each_entry(exp, &nf_ct_expect_hash[cb->args[0]],
-				     hnode) {
+		hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]],
+					 hnode) {
 			if (l3proto && exp->tuple.src.l3num != l3proto)
 				continue;
 
@@ -2732,7 +2732,7 @@ ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 	rcu_read_lock();
 	last = (struct nf_conntrack_expect *)cb->args[1];
 restart:
-	hlist_for_each_entry(exp, &help->expectations, lnode) {
+	hlist_for_each_entry_rcu(exp, &help->expectations, lnode) {
 		if (l3proto && exp->tuple.src.l3num != l3proto)
 			continue;
 		if (cb->args[1]) {

From 39da7c509acff13fc8cb12ec1bb20337c988ed36 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 9 Apr 2017 09:49:44 -0700
Subject: [PATCH 325/410] Linux 4.11-rc6

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 7acbcb324bae..efa267a92ba6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*

From 096e9e912ba8c973213e57701e0591e01064be26 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 6 Apr 2017 09:15:50 +0300
Subject: [PATCH 326/410] nvme-loop: Fix sqsize wrong assignment based on ctrl
 MQES capability

both our sqsize and the controller MQES cap are a 0 based value,
so making it 1 based is wrong.

Reported-by: Trapp, Darren <Darren.Trapp@cavium.com>
Reported-by: Daniel Verkamp <daniel.verkamp@intel.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/target/loop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 22f7bc6bac7f..c7b0b6a52708 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -392,7 +392,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
 
 	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
 	if (error)

From 1af76ddaa80e3bca8001b535e44aaaebd3e1907e Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 6 Apr 2017 09:15:51 +0300
Subject: [PATCH 327/410] nvme-rdma: Fix sqsize wrong assignment based on ctrl
 MQES capability

both our sqsize and the controller MQES cap are a 0 based value,
so making it 1 based is wrong.

Reported-by: Trapp, Darren <Darren.Trapp@cavium.com>
Reported-by: Daniel Verkamp <daniel.verkamp@intel.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 47a479f26e5d..16f84eb0b95e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1606,7 +1606,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
 
 	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
 	if (error)

From c6c64a942c87faa6fca68f5dd208094b8cf61236 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 6 Apr 2017 09:15:52 +0300
Subject: [PATCH 328/410] nvme-fc: Fix sqsize wrong assignment based on ctrl
 MQES capability

both our sqsize and the controller MQES cap are a 0 based value,
so making it 1 based is wrong.

Reported-by: Trapp, Darren <Darren.Trapp@cavium.com>
Reported-by: Daniel Verkamp <daniel.verkamp@intel.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 9690beb15e69..d996ca73d3be 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2023,7 +2023,7 @@ nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl)
 	}
 
 	ctrl->ctrl.sqsize =
-		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
+		min_t(int, NVME_CAP_MQES(ctrl->cap), ctrl->ctrl.sqsize);
 
 	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
 	if (error)

From 17c3060b1701fc69daedb4c90be6325d3d9fca8e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 8 Apr 2017 08:07:33 -0700
Subject: [PATCH 329/410] tcp: clear saved_syn in tcp_disconnect()

In the (very unlikely) case a passive socket becomes a listener,
we do not want to duplicate its saved SYN headers.

This would lead to double frees, use after free, and please hackers and
various fuzzers

Tested:
    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, IPPROTO_TCP, TCP_SAVE_SYN, [1], 4) = 0
   +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0

   +0 bind(3, ..., ...) = 0
   +0 listen(3, 5) = 0

   +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7>
   +0 > S. 0:0(0) ack 1 <...>
  +.1 < . 1:1(0) ack 1 win 257
   +0 accept(3, ..., ...) = 4

   +0 connect(4, AF_UNSPEC, ...) = 0
   +0 close(3) = 0
   +0 bind(4, ..., ...) = 0
   +0 listen(4, 5) = 0

   +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7>
   +0 > S. 0:0(0) ack 1 <...>
  +.1 < . 1:1(0) ack 1 win 257

Fixes: cd8ae85299d5 ("tcp: provide SYN headers for passive connections")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1e319a525d51..40ba4249a586 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2322,6 +2322,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tcp_init_send_head(sk);
 	memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
 	__sk_dst_reset(sk);
+	tcp_saved_syn_free(tp);
 
 	/* Clean up fastopen related fields */
 	tcp_free_fastopen_req(tp);

From dbc9d69edfa0fc3b44156f32747a5add3fbdfb8b Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Wed, 22 Mar 2017 17:03:31 +0900
Subject: [PATCH 330/410] pinctrl: samsung: Add missing part for
 PINCFG_TYPE_DRV of Exynos5433

The commit 1259feddd0f8("pinctrl: samsung: Fix the width of
PINCFG_TYPE_DRV bitfields for Exynos5433") already fixed
the different width of PINCFG_TYPE_DRV from previous Exynos SoC.

However wrong merge conflict resolution was chosen in commit
7f36f5d11cda ("Merge tag 'v4.10-rc6' into devel") effectively dropping
the changes for PINCFG_TYPE_DRV.  Re-do them here.

The macro EXYNOS_PIN_BANK_EINTW is no longer used so remove it.

Fixes: 7f36f5d11cda ("Merge tag 'v4.10-rc6' into devel")
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/samsung/pinctrl-exynos.c | 80 ++++++++++++------------
 drivers/pinctrl/samsung/pinctrl-exynos.h | 11 ----
 2 files changed, 40 insertions(+), 51 deletions(-)

diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c
index f9b49967f512..63e51b56a22a 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.c
@@ -1468,82 +1468,82 @@ const struct samsung_pin_ctrl exynos5420_pin_ctrl[] __initconst = {
 
 /* pin banks of exynos5433 pin-controller - ALIVE */
 static const struct samsung_pin_bank_data exynos5433_pin_banks0[] __initconst = {
-	EXYNOS_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00),
-	EXYNOS_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04),
-	EXYNOS_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08),
-	EXYNOS_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c),
-	EXYNOS_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1),
-	EXYNOS_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1),
-	EXYNOS_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1),
-	EXYNOS_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1),
-	EXYNOS_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1),
+	EXYNOS5433_PIN_BANK_EINTW(8, 0x000, "gpa0", 0x00),
+	EXYNOS5433_PIN_BANK_EINTW(8, 0x020, "gpa1", 0x04),
+	EXYNOS5433_PIN_BANK_EINTW(8, 0x040, "gpa2", 0x08),
+	EXYNOS5433_PIN_BANK_EINTW(8, 0x060, "gpa3", 0x0c),
+	EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x020, "gpf1", 0x1004, 1),
+	EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x040, "gpf2", 0x1008, 1),
+	EXYNOS5433_PIN_BANK_EINTW_EXT(4, 0x060, "gpf3", 0x100c, 1),
+	EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x080, "gpf4", 0x1010, 1),
+	EXYNOS5433_PIN_BANK_EINTW_EXT(8, 0x0a0, "gpf5", 0x1014, 1),
 };
 
 /* pin banks of exynos5433 pin-controller - AUD */
 static const struct samsung_pin_bank_data exynos5433_pin_banks1[] __initconst = {
-	EXYNOS_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00),
-	EXYNOS_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04),
+	EXYNOS5433_PIN_BANK_EINTG(7, 0x000, "gpz0", 0x00),
+	EXYNOS5433_PIN_BANK_EINTG(4, 0x020, "gpz1", 0x04),
 };
 
 /* pin banks of exynos5433 pin-controller - CPIF */
 static const struct samsung_pin_bank_data exynos5433_pin_banks2[] __initconst = {
-	EXYNOS_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00),
+	EXYNOS5433_PIN_BANK_EINTG(2, 0x000, "gpv6", 0x00),
 };
 
 /* pin banks of exynos5433 pin-controller - eSE */
 static const struct samsung_pin_bank_data exynos5433_pin_banks3[] __initconst = {
-	EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00),
+	EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj2", 0x00),
 };
 
 /* pin banks of exynos5433 pin-controller - FINGER */
 static const struct samsung_pin_bank_data exynos5433_pin_banks4[] __initconst = {
-	EXYNOS_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00),
+	EXYNOS5433_PIN_BANK_EINTG(4, 0x000, "gpd5", 0x00),
 };
 
 /* pin banks of exynos5433 pin-controller - FSYS */
 static const struct samsung_pin_bank_data exynos5433_pin_banks5[] __initconst = {
-	EXYNOS_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00),
-	EXYNOS_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04),
-	EXYNOS_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08),
-	EXYNOS_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c),
-	EXYNOS_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10),
-	EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14),
+	EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gph1", 0x00),
+	EXYNOS5433_PIN_BANK_EINTG(7, 0x020, "gpr4", 0x04),
+	EXYNOS5433_PIN_BANK_EINTG(5, 0x040, "gpr0", 0x08),
+	EXYNOS5433_PIN_BANK_EINTG(8, 0x060, "gpr1", 0x0c),
+	EXYNOS5433_PIN_BANK_EINTG(2, 0x080, "gpr2", 0x10),
+	EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpr3", 0x14),
 };
 
 /* pin banks of exynos5433 pin-controller - IMEM */
 static const struct samsung_pin_bank_data exynos5433_pin_banks6[] __initconst = {
-	EXYNOS_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00),
+	EXYNOS5433_PIN_BANK_EINTG(8, 0x000, "gpf0", 0x00),
 };
 
 /* pin banks of exynos5433 pin-controller - NFC */
 static const struct samsung_pin_bank_data exynos5433_pin_banks7[] __initconst = {
-	EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00),
+	EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj0", 0x00),
 };
 
 /* pin banks of exynos5433 pin-controller - PERIC */
 static const struct samsung_pin_bank_data exynos5433_pin_banks8[] __initconst = {
-	EXYNOS_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00),
-	EXYNOS_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04),
-	EXYNOS_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08),
-	EXYNOS_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c),
-	EXYNOS_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10),
-	EXYNOS_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14),
-	EXYNOS_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18),
-	EXYNOS_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c),
-	EXYNOS_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20),
-	EXYNOS_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24),
-	EXYNOS_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28),
-	EXYNOS_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c),
-	EXYNOS_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30),
-	EXYNOS_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34),
-	EXYNOS_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38),
-	EXYNOS_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c),
-	EXYNOS_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40),
+	EXYNOS5433_PIN_BANK_EINTG(6, 0x000, "gpv7", 0x00),
+	EXYNOS5433_PIN_BANK_EINTG(5, 0x020, "gpb0", 0x04),
+	EXYNOS5433_PIN_BANK_EINTG(8, 0x040, "gpc0", 0x08),
+	EXYNOS5433_PIN_BANK_EINTG(2, 0x060, "gpc1", 0x0c),
+	EXYNOS5433_PIN_BANK_EINTG(6, 0x080, "gpc2", 0x10),
+	EXYNOS5433_PIN_BANK_EINTG(8, 0x0a0, "gpc3", 0x14),
+	EXYNOS5433_PIN_BANK_EINTG(2, 0x0c0, "gpg0", 0x18),
+	EXYNOS5433_PIN_BANK_EINTG(4, 0x0e0, "gpd0", 0x1c),
+	EXYNOS5433_PIN_BANK_EINTG(6, 0x100, "gpd1", 0x20),
+	EXYNOS5433_PIN_BANK_EINTG(8, 0x120, "gpd2", 0x24),
+	EXYNOS5433_PIN_BANK_EINTG(5, 0x140, "gpd4", 0x28),
+	EXYNOS5433_PIN_BANK_EINTG(2, 0x160, "gpd8", 0x2c),
+	EXYNOS5433_PIN_BANK_EINTG(7, 0x180, "gpd6", 0x30),
+	EXYNOS5433_PIN_BANK_EINTG(3, 0x1a0, "gpd7", 0x34),
+	EXYNOS5433_PIN_BANK_EINTG(5, 0x1c0, "gpg1", 0x38),
+	EXYNOS5433_PIN_BANK_EINTG(2, 0x1e0, "gpg2", 0x3c),
+	EXYNOS5433_PIN_BANK_EINTG(8, 0x200, "gpg3", 0x40),
 };
 
 /* pin banks of exynos5433 pin-controller - TOUCH */
 static const struct samsung_pin_bank_data exynos5433_pin_banks9[] __initconst = {
-	EXYNOS_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00),
+	EXYNOS5433_PIN_BANK_EINTG(3, 0x000, "gpj1", 0x00),
 };
 
 /*
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.h b/drivers/pinctrl/samsung/pinctrl-exynos.h
index a473092fb8d2..cd046eb7d705 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.h
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.h
@@ -79,17 +79,6 @@
 		.name		= id			\
 	}
 
-#define EXYNOS_PIN_BANK_EINTW_EXT(pins, reg, id, offs, pctl_idx) \
-	{						\
-		.type           = &bank_type_alive,	\
-		.pctl_offset    = reg,                  \
-		.nr_pins        = pins,                 \
-		.eint_type      = EINT_TYPE_WKUP,       \
-		.eint_offset    = offs,                 \
-		.name           = id,                   \
-		.pctl_res_idx   = pctl_idx,             \
-	}						\
-
 #define EXYNOS5433_PIN_BANK_EINTG(pins, reg, id, offs)		\
 	{							\
 		.type		= &exynos5433_bank_type_off,	\

From 264d509637d95f9404e52ced5003ad352e0f6a26 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Mon, 10 Apr 2017 11:16:59 -0400
Subject: [PATCH 331/410] audit: make sure we don't let the retry queue grow
 without bounds

The retry queue is intended to provide a temporary buffer in the case
of transient errors when communicating with auditd, it is not meant
as a long life queue, that functionality is provided by the hold
queue.

This patch fixes a problem identified by Seth where the retry queue
could grow uncontrollably if an auditd instance did not connect to
the kernel to drain the queues.  This commit fixes this by doing the
following:

* Make sure we always call auditd_reset() if we decide the connection
with audit is really dead.  There were some cases in
kauditd_hold_skb() where we did not reset the connection, this patch
relocates the reset calls to kauditd_thread() so all the error
conditions are caught and the connection reset.  As a side effect,
this means we could move auditd_reset() and get rid of the forward
definition at the top of kernel/audit.c.

* We never checked the status of the auditd connection when
processing the main audit queue which meant that the retry queue
could grow unchecked.  This patch adds a call to auditd_reset()
after the main queue has been processed if auditd is not connected,
the auditd_reset() call will make sure the retry and hold queues are
correctly managed/flushed so that the retry queue remains reasonable.

Cc: <stable@vger.kernel.org> # 4.10.x-: 5b52330bbfe6
Reported-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 kernel/audit.c | 67 ++++++++++++++++++++++++--------------------------
 1 file changed, 32 insertions(+), 35 deletions(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index 2f4964cfde0b..a871bf80fde1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -160,7 +160,6 @@ static LIST_HEAD(audit_freelist);
 
 /* queue msgs to send via kauditd_task */
 static struct sk_buff_head audit_queue;
-static void kauditd_hold_skb(struct sk_buff *skb);
 /* queue msgs due to temporary unicast send problems */
 static struct sk_buff_head audit_retry_queue;
 /* queue msgs waiting for new auditd connection */
@@ -453,30 +452,6 @@ static void auditd_set(int pid, u32 portid, struct net *net)
 	spin_unlock_irqrestore(&auditd_conn.lock, flags);
 }
 
-/**
- * auditd_reset - Disconnect the auditd connection
- *
- * Description:
- * Break the auditd/kauditd connection and move all the queued records into the
- * hold queue in case auditd reconnects.
- */
-static void auditd_reset(void)
-{
-	struct sk_buff *skb;
-
-	/* if it isn't already broken, break the connection */
-	rcu_read_lock();
-	if (auditd_conn.pid)
-		auditd_set(0, 0, NULL);
-	rcu_read_unlock();
-
-	/* flush all of the main and retry queues to the hold queue */
-	while ((skb = skb_dequeue(&audit_retry_queue)))
-		kauditd_hold_skb(skb);
-	while ((skb = skb_dequeue(&audit_queue)))
-		kauditd_hold_skb(skb);
-}
-
 /**
  * kauditd_print_skb - Print the audit record to the ring buffer
  * @skb: audit record
@@ -505,9 +480,6 @@ static void kauditd_rehold_skb(struct sk_buff *skb)
 {
 	/* put the record back in the queue at the same place */
 	skb_queue_head(&audit_hold_queue, skb);
-
-	/* fail the auditd connection */
-	auditd_reset();
 }
 
 /**
@@ -544,9 +516,6 @@ static void kauditd_hold_skb(struct sk_buff *skb)
 	/* we have no other options - drop the message */
 	audit_log_lost("kauditd hold queue overflow");
 	kfree_skb(skb);
-
-	/* fail the auditd connection */
-	auditd_reset();
 }
 
 /**
@@ -566,6 +535,30 @@ static void kauditd_retry_skb(struct sk_buff *skb)
 	skb_queue_tail(&audit_retry_queue, skb);
 }
 
+/**
+ * auditd_reset - Disconnect the auditd connection
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the queued records into the
+ * hold queue in case auditd reconnects.
+ */
+static void auditd_reset(void)
+{
+	struct sk_buff *skb;
+
+	/* if it isn't already broken, break the connection */
+	rcu_read_lock();
+	if (auditd_conn.pid)
+		auditd_set(0, 0, NULL);
+	rcu_read_unlock();
+
+	/* flush all of the main and retry queues to the hold queue */
+	while ((skb = skb_dequeue(&audit_retry_queue)))
+		kauditd_hold_skb(skb);
+	while ((skb = skb_dequeue(&audit_queue)))
+		kauditd_hold_skb(skb);
+}
+
 /**
  * auditd_send_unicast_skb - Send a record via unicast to auditd
  * @skb: audit record
@@ -758,6 +751,7 @@ static int kauditd_thread(void *dummy)
 					NULL, kauditd_rehold_skb);
 		if (rc < 0) {
 			sk = NULL;
+			auditd_reset();
 			goto main_queue;
 		}
 
@@ -767,6 +761,7 @@ static int kauditd_thread(void *dummy)
 					NULL, kauditd_hold_skb);
 		if (rc < 0) {
 			sk = NULL;
+			auditd_reset();
 			goto main_queue;
 		}
 
@@ -775,16 +770,18 @@ main_queue:
 		 * unicast, dump failed record sends to the retry queue; if
 		 * sk == NULL due to previous failures we will just do the
 		 * multicast send and move the record to the retry queue */
-		kauditd_send_queue(sk, portid, &audit_queue, 1,
-				   kauditd_send_multicast_skb,
-				   kauditd_retry_skb);
+		rc = kauditd_send_queue(sk, portid, &audit_queue, 1,
+					kauditd_send_multicast_skb,
+					kauditd_retry_skb);
+		if (sk == NULL || rc < 0)
+			auditd_reset();
+		sk = NULL;
 
 		/* drop our netns reference, no auditd sends past this line */
 		if (net) {
 			put_net(net);
 			net = NULL;
 		}
-		sk = NULL;
 
 		/* we have processed all the queues so wake everyone */
 		wake_up(&audit_backlog_wait);

From c06989da39cdb10604d572c8c7ea8c8c97f3c483 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Mon, 10 Apr 2017 17:14:27 +0200
Subject: [PATCH 332/410] x86/vdso: Ensure vdso32_enabled gets set to valid
 values only

vdso_enabled can be set to arbitrary integer values via the kernel command
line 'vdso32=' parameter or via 'sysctl abi.vsyscall32'.

load_vdso32() only maps VDSO if vdso_enabled == 1, but ARCH_DLINFO_IA32
merily checks for vdso_enabled != 0. As a consequence the AT_SYSINFO_EHDR
auxiliary vector for the VDSO_ENTRY is emitted with a NULL pointer which
causes a segfault when the application tries to use the VDSO.

Restrict the valid arguments on the command line and the sysctl to 0 and 1.

Fixes: b0b49f2673f0 ("x86, vdso: Remove compat vdso support")
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Cc: Roland McGrath <roland@redhat.com>
Link: http://lkml.kernel.org/r/1491424561-7187-1-git-send-email-minipli@googlemail.com
Link: http://lkml.kernel.org/r/20170410151723.518412863@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/entry/vdso/vdso32-setup.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c
index 7853b53959cd..3f9d1a83891a 100644
--- a/arch/x86/entry/vdso/vdso32-setup.c
+++ b/arch/x86/entry/vdso/vdso32-setup.c
@@ -30,8 +30,10 @@ static int __init vdso32_setup(char *s)
 {
 	vdso32_enabled = simple_strtoul(s, NULL, 0);
 
-	if (vdso32_enabled > 1)
+	if (vdso32_enabled > 1) {
 		pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
+		vdso32_enabled = 0;
+	}
 
 	return 1;
 }
@@ -62,13 +64,18 @@ subsys_initcall(sysenter_setup);
 /* Register vsyscall32 into the ABI table */
 #include <linux/sysctl.h>
 
+static const int zero;
+static const int one = 1;
+
 static struct ctl_table abi_table2[] = {
 	{
 		.procname	= "vsyscall32",
 		.data		= &vdso32_enabled,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (int *)&zero,
+		.extra2		= (int *)&one,
 	},
 	{}
 };

From 6fdc6dd90272ce7e75d744f71535cfbd8d77da81 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 10 Apr 2017 17:14:28 +0200
Subject: [PATCH 333/410] x86/vdso: Plug race between mapping and ELF header
 setup

The vsyscall32 sysctl can racy against a concurrent fork when it switches
from disabled to enabled:

    arch_setup_additional_pages()
	if (vdso32_enabled)
           --> No mapping
                                        sysctl.vsysscall32()
                                          --> vdso32_enabled = true
    create_elf_tables()
      ARCH_DLINFO_IA32
        if (vdso32_enabled) {
           --> Add VDSO entry with NULL pointer

Make ARCH_DLINFO_IA32 check whether the VDSO mapping has been set up for
the newly forked process or not.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@amacapital.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathias Krause <minipli@googlemail.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170410151723.602367196@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/elf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9d49c18b5ea9..3762536619f8 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -287,7 +287,7 @@ struct task_struct;
 
 #define	ARCH_DLINFO_IA32						\
 do {									\
-	if (vdso32_enabled) {						\
+	if (VDSO_CURRENT_BASE) {					\
 		NEW_AUX_ENT(AT_SYSINFO,	VDSO_ENTRY);			\
 		NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);	\
 	}								\

From 8f10d0149fb983aa84a02a92f9c2113d69e23cb8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 4 Apr 2017 21:04:23 +0300
Subject: [PATCH 334/410] Revert "virtio_pci: fix out of bound access for
 msix_names"

This reverts commit de85ec8b07f82c8c84de7687f769e74bf4c26a1e.

Follow-up patches will revert 07ec51480b5e ("virtio_pci: use shared
interrupts for virtqueues") that triggered the problem so no need for
this one anymore.

Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 590534910dc6..df548a6fb844 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -147,7 +147,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
-	int i, j, err = -ENOMEM, allocated_vectors, nvectors;
+	int i, err = -ENOMEM, allocated_vectors, nvectors;
 	unsigned flags = PCI_IRQ_MSIX;
 	bool shared = false;
 	u16 msix_vec;
@@ -212,7 +212,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	if (!vp_dev->msix_vector_map)
 		goto out_disable_config_irq;
 
-	allocated_vectors = j = 1; /* vector 0 is the config interrupt */
+	allocated_vectors = 1; /* vector 0 is the config interrupt */
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
@@ -236,19 +236,18 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 			continue;
 		}
 
-		snprintf(vp_dev->msix_names[j],
+		snprintf(vp_dev->msix_names[i + 1],
 			 sizeof(*vp_dev->msix_names), "%s-%s",
 			 dev_name(&vp_dev->vdev.dev), names[i]);
 		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
 				  vring_interrupt, IRQF_SHARED,
-				  vp_dev->msix_names[j], vqs[i]);
+				  vp_dev->msix_names[i + 1], vqs[i]);
 		if (err) {
 			/* don't free this irq on error */
 			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
 			goto out_remove_vqs;
 		}
 		vp_dev->msix_vector_map[i] = msix_vec;
-		j++;
 
 		/*
 		 * Use a different vector for each queue if they are available,

From bf951b1045cc5d694b5e767c4f4d05df4f2cbe89 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 4 Apr 2017 21:08:54 +0300
Subject: [PATCH 335/410] Revert "virtio_pci: simplify MSI-X setup"

This reverts commit 52a61516125fa9a21b3bdf4f90928308e2e5573f.

Conflicts:
	drivers/virtio/virtio_pci_common.c

The cleanup seems to be one of the changes that broke
hybernation for some users. We are still not sure why
but revert helps.

This reverts the cleanup changes but keeps the affinity support.

Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 32 ++++++++++++++----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index df548a6fb844..4608fd9aaa6c 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -143,13 +143,13 @@ void vp_del_vqs(struct virtio_device *vdev)
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		struct virtqueue *vqs[], vq_callback_t *callbacks[],
-		const char * const names[], struct irq_affinity *desc)
+		const char * const names[], bool per_vq_vectors,
+		struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	const char *name = dev_name(&vp_dev->vdev.dev);
 	int i, err = -ENOMEM, allocated_vectors, nvectors;
 	unsigned flags = PCI_IRQ_MSIX;
-	bool shared = false;
 	u16 msix_vec;
 
 	if (desc) {
@@ -162,16 +162,12 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		if (callbacks[i])
 			nvectors++;
 
-	/* Try one vector per queue first. */
-	err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
-			nvectors, flags, desc);
-	if (err < 0) {
-		/* Fallback to one vector for config, one shared for queues. */
-		shared = true;
+	if (per_vq_vectors) {
+		err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
+				nvectors, flags, desc);
+	} else {
 		err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
 				PCI_IRQ_MSIX);
-		if (err < 0)
-			return err;
 	}
 	if (err < 0)
 		return err;
@@ -199,7 +195,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
 			0, vp_dev->msix_names[0], vp_dev);
 	if (err)
-		goto out_free_msix_affinity_masks;
+		goto out_free_irq_vectors;
 
 	/* Verify we had enough resources to assign the vector */
 	if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
@@ -249,11 +245,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		}
 		vp_dev->msix_vector_map[i] = msix_vec;
 
-		/*
-		 * Use a different vector for each queue if they are available,
-		 * else share the same vector for all VQs.
-		 */
-		if (!shared)
+		if (per_vq_vectors)
 			allocated_vectors++;
 	}
 
@@ -319,9 +311,15 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 {
 	int err;
 
-	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, desc);
+	/* Try MSI-X with one vector per queue. */
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, true, desc);
 	if (!err)
 		return 0;
+	/* Fallback: MSI-X with one vector for config, one shared for queues. */
+	err = vp_find_vqs_msix(vdev, nvqs, vqs, callbacks, names, false, desc);
+	if (!err)
+		return 0;
+	/* Finally fall back to regular interrupts. */
 	return vp_find_vqs_intx(vdev, nvqs, vqs, callbacks, names);
 }
 

From 2008c1544c73d5190f81ef1790fa5bd2fade5bd0 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 4 Apr 2017 21:09:20 +0300
Subject: [PATCH 336/410] Revert "virtio_pci: don't duplicate the msix_enable
 flag in struct pci_dev"

This reverts commit 53a020c661741f3b87ad3ac6fa545088aaebac9b.

The cleanup seems to be one of the changes that broke
hybernation for some users. We are still not sure why
but revert helps.

Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 5 +++--
 drivers/virtio/virtio_pci_common.h | 2 ++
 drivers/virtio/virtio_pci_legacy.c | 2 +-
 drivers/virtio/virtio_pci_modern.c | 2 +-
 include/uapi/linux/virtio_pci.h    | 2 +-
 5 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 4608fd9aaa6c..3921b0a2439e 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -125,7 +125,7 @@ void vp_del_vqs(struct virtio_device *vdev)
 
 	vp_remove_vqs(vdev);
 
-	if (vp_dev->pci_dev->msix_enabled) {
+	if (vp_dev->msix_enabled) {
 		for (i = 0; i < vp_dev->msix_vectors; i++)
 			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
 
@@ -249,6 +249,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 			allocated_vectors++;
 	}
 
+	vp_dev->msix_enabled = 1;
 	return 0;
 
 out_remove_vqs:
@@ -343,7 +344,7 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 	if (!vq->callback)
 		return -EINVAL;
 
-	if (vp_dev->pci_dev->msix_enabled) {
+	if (vp_dev->msix_enabled) {
 		int vec = vp_dev->msix_vector_map[vq->index];
 		struct cpumask *mask = vp_dev->msix_affinity_masks[vec];
 		unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index ac8c9d788964..c8074997fd28 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -64,6 +64,8 @@ struct virtio_pci_device {
 	/* the IO mapping for the PCI config space */
 	void __iomem *ioaddr;
 
+	/* MSI-X support */
+	int msix_enabled;
 	cpumask_var_t *msix_affinity_masks;
 	/* Name strings for interrupts. This size should be enough,
 	 * and I'm too lazy to allocate each name separately. */
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index f7362c5fe18a..5dd01f09608b 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -165,7 +165,7 @@ static void del_vq(struct virtqueue *vq)
 
 	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 
-	if (vp_dev->pci_dev->msix_enabled) {
+	if (vp_dev->msix_enabled) {
 		iowrite16(VIRTIO_MSI_NO_VECTOR,
 			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
 		/* Flush the write out to device */
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 7bc3004b840e..7ce36daccc31 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -411,7 +411,7 @@ static void del_vq(struct virtqueue *vq)
 
 	vp_iowrite16(vq->index, &vp_dev->common->queue_select);
 
-	if (vp_dev->pci_dev->msix_enabled) {
+	if (vp_dev->msix_enabled) {
 		vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
 			     &vp_dev->common->queue_msix_vector);
 		/* Flush the write out to device */
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 15b4385a2be1..90007a1abcab 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -79,7 +79,7 @@
  * configuration space */
 #define VIRTIO_PCI_CONFIG_OFF(msix_enabled)	((msix_enabled) ? 24 : 20)
 /* Deprecated: please use VIRTIO_PCI_CONFIG_OFF instead */
-#define VIRTIO_PCI_CONFIG(dev)	VIRTIO_PCI_CONFIG_OFF((dev)->pci_dev->msix_enabled)
+#define VIRTIO_PCI_CONFIG(dev)	VIRTIO_PCI_CONFIG_OFF((dev)->msix_enabled)
 
 /* Virtio ABI version, this must match exactly */
 #define VIRTIO_PCI_ABI_VERSION		0

From 0b0f9dc52ed0333fa52a9314b53d0b2b248b821d Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 4 Apr 2017 21:15:41 +0300
Subject: [PATCH 337/410] Revert "virtio_pci: use shared interrupts for
 virtqueues"

This reverts commit 07ec51480b5eb1233f8c1b0f5d7a7c8d1247c507.

Conflicts:
	drivers/virtio/virtio_pci_common.c

Unfortunately the idea does not work with threadirqs
as more than 32 queues can then map to a single interrupts.

Further, the cleanup seems to be one of the changes that broke
hybernation for some users. We are still not sure why
but revert helps.

This reverts the cleanup changes but keeps the affinity support.

Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 244 ++++++++++++++++-------------
 drivers/virtio/virtio_pci_common.h |  16 +-
 2 files changed, 148 insertions(+), 112 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 3921b0a2439e..d99029d3892e 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -33,8 +33,10 @@ void vp_synchronize_vectors(struct virtio_device *vdev)
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i;
 
-	synchronize_irq(pci_irq_vector(vp_dev->pci_dev, 0));
-	for (i = 1; i < vp_dev->msix_vectors; i++)
+	if (vp_dev->intx_enabled)
+		synchronize_irq(vp_dev->pci_dev->irq);
+
+	for (i = 0; i < vp_dev->msix_vectors; ++i)
 		synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i));
 }
 
@@ -97,10 +99,79 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 	return vp_vring_interrupt(irq, opaque);
 }
 
-static void vp_remove_vqs(struct virtio_device *vdev)
+static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
+				   bool per_vq_vectors, struct irq_affinity *desc)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	const char *name = dev_name(&vp_dev->vdev.dev);
+	unsigned i, v;
+	int err = -ENOMEM;
+
+	vp_dev->msix_vectors = nvectors;
+
+	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
+				     GFP_KERNEL);
+	if (!vp_dev->msix_names)
+		goto error;
+	vp_dev->msix_affinity_masks
+		= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
+			  GFP_KERNEL);
+	if (!vp_dev->msix_affinity_masks)
+		goto error;
+	for (i = 0; i < nvectors; ++i)
+		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
+					GFP_KERNEL))
+			goto error;
+
+	err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
+					     nvectors, PCI_IRQ_MSIX |
+					     (desc ? PCI_IRQ_AFFINITY : 0),
+					     desc);
+	if (err < 0)
+		goto error;
+	vp_dev->msix_enabled = 1;
+
+	/* Set the vector used for configuration */
+	v = vp_dev->msix_used_vectors;
+	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+		 "%s-config", name);
+	err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
+			  vp_config_changed, 0, vp_dev->msix_names[v],
+			  vp_dev);
+	if (err)
+		goto error;
+	++vp_dev->msix_used_vectors;
+
+	v = vp_dev->config_vector(vp_dev, v);
+	/* Verify we had enough resources to assign the vector */
+	if (v == VIRTIO_MSI_NO_VECTOR) {
+		err = -EBUSY;
+		goto error;
+	}
+
+	if (!per_vq_vectors) {
+		/* Shared vector for all VQs */
+		v = vp_dev->msix_used_vectors;
+		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
+			 "%s-virtqueues", name);
+		err = request_irq(pci_irq_vector(vp_dev->pci_dev, v),
+				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
+				  vp_dev);
+		if (err)
+			goto error;
+		++vp_dev->msix_used_vectors;
+	}
+	return 0;
+error:
+	return err;
+}
+
+/* the config->del_vqs() implementation */
+void vp_del_vqs(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	struct virtqueue *vq, *n;
+	int i;
 
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
 		if (vp_dev->msix_vector_map) {
@@ -112,33 +183,35 @@ static void vp_remove_vqs(struct virtio_device *vdev)
 		}
 		vp_dev->del_vq(vq);
 	}
-}
 
-/* the config->del_vqs() implementation */
-void vp_del_vqs(struct virtio_device *vdev)
-{
-	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	int i;
+	if (vp_dev->intx_enabled) {
+		free_irq(vp_dev->pci_dev->irq, vp_dev);
+		vp_dev->intx_enabled = 0;
+	}
 
-	if (WARN_ON_ONCE(list_empty_careful(&vdev->vqs)))
-		return;
+	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
+		free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev);
 
-	vp_remove_vqs(vdev);
-
-	if (vp_dev->msix_enabled) {
-		for (i = 0; i < vp_dev->msix_vectors; i++)
+	for (i = 0; i < vp_dev->msix_vectors; i++)
+		if (vp_dev->msix_affinity_masks[i])
 			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
 
+	if (vp_dev->msix_enabled) {
 		/* Disable the vector used for configuration */
 		vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
 
-		kfree(vp_dev->msix_affinity_masks);
-		kfree(vp_dev->msix_names);
-		kfree(vp_dev->msix_vector_map);
+		pci_free_irq_vectors(vp_dev->pci_dev);
+		vp_dev->msix_enabled = 0;
 	}
 
-	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
-	pci_free_irq_vectors(vp_dev->pci_dev);
+	vp_dev->msix_vectors = 0;
+	vp_dev->msix_used_vectors = 0;
+	kfree(vp_dev->msix_names);
+	vp_dev->msix_names = NULL;
+	kfree(vp_dev->msix_affinity_masks);
+	vp_dev->msix_affinity_masks = NULL;
+	kfree(vp_dev->msix_vector_map);
+	vp_dev->msix_vector_map = NULL;
 }
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
@@ -147,128 +220,80 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 		struct irq_affinity *desc)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	const char *name = dev_name(&vp_dev->vdev.dev);
-	int i, err = -ENOMEM, allocated_vectors, nvectors;
-	unsigned flags = PCI_IRQ_MSIX;
 	u16 msix_vec;
-
-	if (desc) {
-		flags |= PCI_IRQ_AFFINITY;
-		desc->pre_vectors++; /* virtio config vector */
-	}
-
-	nvectors = 1;
-	for (i = 0; i < nvqs; i++)
-		if (callbacks[i])
-			nvectors++;
+	int i, err, nvectors, allocated_vectors;
 
 	if (per_vq_vectors) {
-		err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors,
-				nvectors, flags, desc);
+		/* Best option: one for change interrupt, one per vq. */
+		nvectors = 1;
+		for (i = 0; i < nvqs; ++i)
+			if (callbacks[i])
+				++nvectors;
 	} else {
-		err = pci_alloc_irq_vectors(vp_dev->pci_dev, 2, 2,
-				PCI_IRQ_MSIX);
-	}
-	if (err < 0)
-		return err;
-
-	vp_dev->msix_vectors = nvectors;
-	vp_dev->msix_names = kmalloc_array(nvectors,
-			sizeof(*vp_dev->msix_names), GFP_KERNEL);
-	if (!vp_dev->msix_names)
-		goto out_free_irq_vectors;
-
-	vp_dev->msix_affinity_masks = kcalloc(nvectors,
-			sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL);
-	if (!vp_dev->msix_affinity_masks)
-		goto out_free_msix_names;
-
-	for (i = 0; i < nvectors; ++i) {
-		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
-				GFP_KERNEL))
-			goto out_free_msix_affinity_masks;
+		/* Second best: one for change, shared for all vqs. */
+		nvectors = 2;
 	}
 
-	/* Set the vector used for configuration */
-	snprintf(vp_dev->msix_names[0], sizeof(*vp_dev->msix_names),
-		 "%s-config", name);
-	err = request_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_config_changed,
-			0, vp_dev->msix_names[0], vp_dev);
+	err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors,
+				      per_vq_vectors ? desc : NULL);
 	if (err)
-		goto out_free_irq_vectors;
+		goto error_find;
 
-	/* Verify we had enough resources to assign the vector */
-	if (vp_dev->config_vector(vp_dev, 0) == VIRTIO_MSI_NO_VECTOR) {
-		err = -EBUSY;
-		goto out_free_config_irq;
+	if (per_vq_vectors) {
+		vp_dev->msix_vector_map = kmalloc_array(nvqs,
+				sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
+		if (!vp_dev->msix_vector_map)
+			goto error_find;
 	}
 
-	vp_dev->msix_vector_map = kmalloc_array(nvqs,
-			sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
-	if (!vp_dev->msix_vector_map)
-		goto out_disable_config_irq;
-
-	allocated_vectors = 1; /* vector 0 is the config interrupt */
+	allocated_vectors = vp_dev->msix_used_vectors;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
 			continue;
 		}
 
-		if (callbacks[i])
-			msix_vec = allocated_vectors;
-		else
+		if (!callbacks[i])
 			msix_vec = VIRTIO_MSI_NO_VECTOR;
-
+		else if (per_vq_vectors)
+			msix_vec = allocated_vectors++;
+		else
+			msix_vec = VP_MSIX_VQ_VECTOR;
 		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
 				msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
-			goto out_remove_vqs;
+			goto error_find;
 		}
 
+		if (!per_vq_vectors)
+			continue;
+
 		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
 			continue;
 		}
 
-		snprintf(vp_dev->msix_names[i + 1],
-			 sizeof(*vp_dev->msix_names), "%s-%s",
+		/* allocate per-vq irq if available and necessary */
+		snprintf(vp_dev->msix_names[msix_vec],
+			 sizeof *vp_dev->msix_names,
+			 "%s-%s",
 			 dev_name(&vp_dev->vdev.dev), names[i]);
 		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
-				  vring_interrupt, IRQF_SHARED,
-				  vp_dev->msix_names[i + 1], vqs[i]);
+				  vring_interrupt, 0,
+				  vp_dev->msix_names[msix_vec],
+				  vqs[i]);
 		if (err) {
 			/* don't free this irq on error */
 			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
-			goto out_remove_vqs;
+			goto error_find;
 		}
 		vp_dev->msix_vector_map[i] = msix_vec;
-
-		if (per_vq_vectors)
-			allocated_vectors++;
 	}
-
-	vp_dev->msix_enabled = 1;
 	return 0;
 
-out_remove_vqs:
-	vp_remove_vqs(vdev);
-	kfree(vp_dev->msix_vector_map);
-out_disable_config_irq:
-	vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
-out_free_config_irq:
-	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
-out_free_msix_affinity_masks:
-	for (i = 0; i < nvectors; i++) {
-		if (vp_dev->msix_affinity_masks[i])
-			free_cpumask_var(vp_dev->msix_affinity_masks[i]);
-	}
-	kfree(vp_dev->msix_affinity_masks);
-out_free_msix_names:
-	kfree(vp_dev->msix_names);
-out_free_irq_vectors:
-	pci_free_irq_vectors(vp_dev->pci_dev);
+error_find:
+	vp_del_vqs(vdev);
 	return err;
 }
 
@@ -282,8 +307,9 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
 			dev_name(&vdev->dev), vp_dev);
 	if (err)
-		return err;
+		goto out_del_vqs;
 
+	vp_dev->intx_enabled = 1;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
@@ -293,15 +319,13 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 				VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
-			goto out_remove_vqs;
+			goto out_del_vqs;
 		}
 	}
 
 	return 0;
-
-out_remove_vqs:
-	vp_remove_vqs(vdev);
-	free_irq(pci_irq_vector(vp_dev->pci_dev, 0), vp_dev);
+out_del_vqs:
+	vp_del_vqs(vdev);
 	return err;
 }
 
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index c8074997fd28..3cdabba8415e 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -66,12 +66,16 @@ struct virtio_pci_device {
 
 	/* MSI-X support */
 	int msix_enabled;
+	int intx_enabled;
 	cpumask_var_t *msix_affinity_masks;
 	/* Name strings for interrupts. This size should be enough,
 	 * and I'm too lazy to allocate each name separately. */
 	char (*msix_names)[256];
-	/* Total Number of MSI-X vectors (including per-VQ ones). */
-	int msix_vectors;
+	/* Number of available vectors */
+	unsigned msix_vectors;
+	/* Vectors allocated, excluding per-vq vectors if any */
+	unsigned msix_used_vectors;
+
 	/* Map of per-VQ MSI-X vectors, may be NULL */
 	unsigned *msix_vector_map;
 
@@ -85,6 +89,14 @@ struct virtio_pci_device {
 	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
 };
 
+/* Constants for MSI-X */
+/* Use first vector for configuration changes, second and the rest for
+ * virtqueues Thus, we need at least 2 vectors for MSI. */
+enum {
+	VP_MSIX_CONFIG_VECTOR = 0,
+	VP_MSIX_VQ_VECTOR = 1,
+};
+
 /* Convert a generic virtio device to our structure */
 static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
 {

From 0a9b3f47da5b8a2c4bf4f2f2199761f49ac0a54e Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 4 Apr 2017 21:44:44 +0300
Subject: [PATCH 338/410] Revert "virtio_pci: remove struct virtio_pci_vq_info"

This reverts commit 5c34d002dcc7a6dd665a19d098b4f4cd5501ba1a.

Conflicts:
	drivers/virtio/virtio_pci_common.c

The cleanup seems to be one of the changes that broke
hybernation for some users. We are still not sure why
but revert helps.

This reverts the cleanup changes but keeps the affinity support.

Tested-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 128 ++++++++++++++++++++---------
 drivers/virtio/virtio_pci_common.h |  25 +++++-
 drivers/virtio/virtio_pci_legacy.c |   6 +-
 drivers/virtio/virtio_pci_modern.c |   6 +-
 4 files changed, 122 insertions(+), 43 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index d99029d3892e..e41bff3ec79b 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -62,13 +62,16 @@ static irqreturn_t vp_config_changed(int irq, void *opaque)
 static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
 {
 	struct virtio_pci_device *vp_dev = opaque;
+	struct virtio_pci_vq_info *info;
 	irqreturn_t ret = IRQ_NONE;
-	struct virtqueue *vq;
+	unsigned long flags;
 
-	list_for_each_entry(vq, &vp_dev->vdev.vqs, list) {
-		if (vq->callback && vring_interrupt(irq, vq) == IRQ_HANDLED)
+	spin_lock_irqsave(&vp_dev->lock, flags);
+	list_for_each_entry(info, &vp_dev->virtqueues, node) {
+		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
 			ret = IRQ_HANDLED;
 	}
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
 
 	return ret;
 }
@@ -166,6 +169,56 @@ error:
 	return err;
 }
 
+static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
+				     void (*callback)(struct virtqueue *vq),
+				     const char *name,
+				     u16 msix_vec)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
+	struct virtqueue *vq;
+	unsigned long flags;
+
+	/* fill out our structure that represents an active queue */
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name,
+			      msix_vec);
+	if (IS_ERR(vq))
+		goto out_info;
+
+	info->vq = vq;
+	if (callback) {
+		spin_lock_irqsave(&vp_dev->lock, flags);
+		list_add(&info->node, &vp_dev->virtqueues);
+		spin_unlock_irqrestore(&vp_dev->lock, flags);
+	} else {
+		INIT_LIST_HEAD(&info->node);
+	}
+
+	vp_dev->vqs[index] = info;
+	return vq;
+
+out_info:
+	kfree(info);
+	return vq;
+}
+
+static void vp_del_vq(struct virtqueue *vq)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
+	unsigned long flags;
+
+	spin_lock_irqsave(&vp_dev->lock, flags);
+	list_del(&info->node);
+	spin_unlock_irqrestore(&vp_dev->lock, flags);
+
+	vp_dev->del_vq(info);
+	kfree(info);
+}
+
 /* the config->del_vqs() implementation */
 void vp_del_vqs(struct virtio_device *vdev)
 {
@@ -174,15 +227,16 @@ void vp_del_vqs(struct virtio_device *vdev)
 	int i;
 
 	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
-		if (vp_dev->msix_vector_map) {
-			int v = vp_dev->msix_vector_map[vq->index];
+		if (vp_dev->per_vq_vectors) {
+			int v = vp_dev->vqs[vq->index]->msix_vector;
 
 			if (v != VIRTIO_MSI_NO_VECTOR)
 				free_irq(pci_irq_vector(vp_dev->pci_dev, v),
 					vq);
 		}
-		vp_dev->del_vq(vq);
+		vp_del_vq(vq);
 	}
+	vp_dev->per_vq_vectors = false;
 
 	if (vp_dev->intx_enabled) {
 		free_irq(vp_dev->pci_dev->irq, vp_dev);
@@ -210,8 +264,8 @@ void vp_del_vqs(struct virtio_device *vdev)
 	vp_dev->msix_names = NULL;
 	kfree(vp_dev->msix_affinity_masks);
 	vp_dev->msix_affinity_masks = NULL;
-	kfree(vp_dev->msix_vector_map);
-	vp_dev->msix_vector_map = NULL;
+	kfree(vp_dev->vqs);
+	vp_dev->vqs = NULL;
 }
 
 static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
@@ -223,6 +277,10 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	u16 msix_vec;
 	int i, err, nvectors, allocated_vectors;
 
+	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
+	if (!vp_dev->vqs)
+		return -ENOMEM;
+
 	if (per_vq_vectors) {
 		/* Best option: one for change interrupt, one per vq. */
 		nvectors = 1;
@@ -239,13 +297,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 	if (err)
 		goto error_find;
 
-	if (per_vq_vectors) {
-		vp_dev->msix_vector_map = kmalloc_array(nvqs,
-				sizeof(*vp_dev->msix_vector_map), GFP_KERNEL);
-		if (!vp_dev->msix_vector_map)
-			goto error_find;
-	}
-
+	vp_dev->per_vq_vectors = per_vq_vectors;
 	allocated_vectors = vp_dev->msix_used_vectors;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
@@ -255,25 +307,20 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 
 		if (!callbacks[i])
 			msix_vec = VIRTIO_MSI_NO_VECTOR;
-		else if (per_vq_vectors)
+		else if (vp_dev->per_vq_vectors)
 			msix_vec = allocated_vectors++;
 		else
 			msix_vec = VP_MSIX_VQ_VECTOR;
-		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
-				msix_vec);
+		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+				     msix_vec);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto error_find;
 		}
 
-		if (!per_vq_vectors)
+		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
 			continue;
 
-		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
-			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
-			continue;
-		}
-
 		/* allocate per-vq irq if available and necessary */
 		snprintf(vp_dev->msix_names[msix_vec],
 			 sizeof *vp_dev->msix_names,
@@ -283,12 +330,8 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs,
 				  vring_interrupt, 0,
 				  vp_dev->msix_names[msix_vec],
 				  vqs[i]);
-		if (err) {
-			/* don't free this irq on error */
-			vp_dev->msix_vector_map[i] = VIRTIO_MSI_NO_VECTOR;
+		if (err)
 			goto error_find;
-		}
-		vp_dev->msix_vector_map[i] = msix_vec;
 	}
 	return 0;
 
@@ -304,19 +347,24 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned nvqs,
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	int i, err;
 
+	vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL);
+	if (!vp_dev->vqs)
+		return -ENOMEM;
+
 	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED,
 			dev_name(&vdev->dev), vp_dev);
 	if (err)
 		goto out_del_vqs;
 
 	vp_dev->intx_enabled = 1;
+	vp_dev->per_vq_vectors = false;
 	for (i = 0; i < nvqs; ++i) {
 		if (!names[i]) {
 			vqs[i] = NULL;
 			continue;
 		}
-		vqs[i] = vp_dev->setup_vq(vp_dev, i, callbacks[i], names[i],
-				VIRTIO_MSI_NO_VECTOR);
+		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i],
+				     VIRTIO_MSI_NO_VECTOR);
 		if (IS_ERR(vqs[i])) {
 			err = PTR_ERR(vqs[i]);
 			goto out_del_vqs;
@@ -364,15 +412,16 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 {
 	struct virtio_device *vdev = vq->vdev;
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
+	struct cpumask *mask;
+	unsigned int irq;
 
 	if (!vq->callback)
 		return -EINVAL;
 
 	if (vp_dev->msix_enabled) {
-		int vec = vp_dev->msix_vector_map[vq->index];
-		struct cpumask *mask = vp_dev->msix_affinity_masks[vec];
-		unsigned int irq = pci_irq_vector(vp_dev->pci_dev, vec);
-
+		mask = vp_dev->msix_affinity_masks[info->msix_vector];
+		irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector);
 		if (cpu == -1)
 			irq_set_affinity_hint(irq, NULL);
 		else {
@@ -387,12 +436,13 @@ int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	unsigned int *map = vp_dev->msix_vector_map;
 
-	if (!map || map[index] == VIRTIO_MSI_NO_VECTOR)
+	if (!vp_dev->per_vq_vectors ||
+	    vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR)
 		return NULL;
 
-	return pci_irq_get_affinity(vp_dev->pci_dev, map[index]);
+	return pci_irq_get_affinity(vp_dev->pci_dev,
+				    vp_dev->vqs[index]->msix_vector);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -463,6 +513,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 	vp_dev->vdev.dev.parent = &pci_dev->dev;
 	vp_dev->vdev.dev.release = virtio_pci_release_dev;
 	vp_dev->pci_dev = pci_dev;
+	INIT_LIST_HEAD(&vp_dev->virtqueues);
+	spin_lock_init(&vp_dev->lock);
 
 	/* enable the device */
 	rc = pci_enable_device(pci_dev);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index 3cdabba8415e..e96334aec1e0 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -31,6 +31,17 @@
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 
+struct virtio_pci_vq_info {
+	/* the actual virtqueue */
+	struct virtqueue *vq;
+
+	/* the list node for the virtqueues list */
+	struct list_head node;
+
+	/* MSI-X vector (or none) */
+	unsigned msix_vector;
+};
+
 /* Our device structure */
 struct virtio_pci_device {
 	struct virtio_device vdev;
@@ -64,6 +75,13 @@ struct virtio_pci_device {
 	/* the IO mapping for the PCI config space */
 	void __iomem *ioaddr;
 
+	/* a list of queues so we can dispatch IRQs */
+	spinlock_t lock;
+	struct list_head virtqueues;
+
+	/* array of all queues for house-keeping */
+	struct virtio_pci_vq_info **vqs;
+
 	/* MSI-X support */
 	int msix_enabled;
 	int intx_enabled;
@@ -76,15 +94,16 @@ struct virtio_pci_device {
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
 
-	/* Map of per-VQ MSI-X vectors, may be NULL */
-	unsigned *msix_vector_map;
+	/* Whether we have vector per vq */
+	bool per_vq_vectors;
 
 	struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
+				      struct virtio_pci_vq_info *info,
 				      unsigned idx,
 				      void (*callback)(struct virtqueue *vq),
 				      const char *name,
 				      u16 msix_vec);
-	void (*del_vq)(struct virtqueue *vq);
+	void (*del_vq)(struct virtio_pci_vq_info *info);
 
 	u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
 };
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 5dd01f09608b..4bfa48fb1324 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -112,6 +112,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
+				  struct virtio_pci_vq_info *info,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
@@ -129,6 +130,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
 		return ERR_PTR(-ENOENT);
 
+	info->msix_vector = msix_vec;
+
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
@@ -159,8 +162,9 @@ out_deactivate:
 	return ERR_PTR(err);
 }
 
-static void del_vq(struct virtqueue *vq)
+static void del_vq(struct virtio_pci_vq_info *info)
 {
+	struct virtqueue *vq = info->vq;
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 
 	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index 7ce36daccc31..8978f109d2d7 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -293,6 +293,7 @@ static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
+				  struct virtio_pci_vq_info *info,
 				  unsigned index,
 				  void (*callback)(struct virtqueue *vq),
 				  const char *name,
@@ -322,6 +323,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	/* get offset of notification word for this vq */
 	off = vp_ioread16(&cfg->queue_notify_off);
 
+	info->msix_vector = msix_vec;
+
 	/* create the vring */
 	vq = vring_create_virtqueue(index, num,
 				    SMP_CACHE_BYTES, &vp_dev->vdev,
@@ -405,8 +408,9 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 	return 0;
 }
 
-static void del_vq(struct virtqueue *vq)
+static void del_vq(struct virtio_pci_vq_info *info)
 {
+	struct virtqueue *vq = info->vq;
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 
 	vp_iowrite16(vq->index, &vp_dev->common->queue_select);

From 2f8dc3a01f1cf8ed17b1e295812ad12b688be5d3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 8 Mar 2017 08:09:27 +0000
Subject: [PATCH 339/410] virtio-pci: Remove affinity hint before freeing the
 interrupt

virtio-pci registers a per-vq affinity hint when using MSIX,
but fails to remove it when freeing the interrupt, resulting
in this type of splat:

[   31.111202] WARNING: CPU: 0 PID: 2823 at kernel/irq/manage.c:1503 __free_irq+0x2c4/0x2c8
[   31.114689] Modules linked in:
[   31.116101] CPU: 0 PID: 2823 Comm: kexec Not tainted 4.10.0+ #6941
[   31.118911] Hardware name: Generic DT based system
[   31.121319] [<c022fb78>] (unwind_backtrace) from [<c0229d8c>] (show_stack+0x18/0x1c)
[   31.125017] [<c0229d8c>] (show_stack) from [<c05192f4>] (dump_stack+0x84/0x98)
[   31.128427] [<c05192f4>] (dump_stack) from [<c023d940>] (__warn+0xf4/0x10c)
[   31.131910] [<c023d940>] (__warn) from [<c023da20>] (warn_slowpath_null+0x28/0x30)
[   31.135543] [<c023da20>] (warn_slowpath_null) from [<c0290238>] (__free_irq+0x2c4/0x2c8)
[   31.139355] [<c0290238>] (__free_irq) from [<c02902d0>] (free_irq+0x44/0x78)
[   31.142909] [<c02902d0>] (free_irq) from [<c059d3a8>] (vp_del_vqs+0x68/0x1c0)
[   31.146299] [<c059d3a8>] (vp_del_vqs) from [<c056ca4c>] (pci_device_shutdown+0x3c/0x78)

The obvious fix is to drop the affinity hint before freeing the
interrupt.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci_common.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index e41bff3ec79b..698d5d06fa03 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -230,9 +230,12 @@ void vp_del_vqs(struct virtio_device *vdev)
 		if (vp_dev->per_vq_vectors) {
 			int v = vp_dev->vqs[vq->index]->msix_vector;
 
-			if (v != VIRTIO_MSI_NO_VECTOR)
-				free_irq(pci_irq_vector(vp_dev->pci_dev, v),
-					vq);
+			if (v != VIRTIO_MSI_NO_VECTOR) {
+				int irq = pci_irq_vector(vp_dev->pci_dev, v);
+
+				irq_set_affinity_hint(irq, NULL);
+				free_irq(irq, vq);
+			}
 		}
 		vp_del_vq(vq);
 	}

From bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Fri, 7 Apr 2017 16:51:55 +0800
Subject: [PATCH 340/410] cgroup: avoid attaching a cgroup root to two
 different superblocks

Run this:

    touch file0
    for ((; ;))
    {
        mount -t cpuset xxx file0
    }

And this concurrently:

    touch file1
    for ((; ;))
    {
        mount -t cpuset xxx file1
    }

We'll trigger a warning like this:

 ------------[ cut here ]------------
 WARNING: CPU: 1 PID: 4675 at lib/percpu-refcount.c:317 percpu_ref_kill_and_confirm+0x92/0xb0
 percpu_ref_kill_and_confirm called more than once on css_release!
 CPU: 1 PID: 4675 Comm: mount Not tainted 4.11.0-rc5+ #5
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
 Call Trace:
  dump_stack+0x63/0x84
  __warn+0xd1/0xf0
  warn_slowpath_fmt+0x5f/0x80
  percpu_ref_kill_and_confirm+0x92/0xb0
  cgroup_kill_sb+0x95/0xb0
  deactivate_locked_super+0x43/0x70
  deactivate_super+0x46/0x60
 ...
 ---[ end trace a79f61c2a2633700 ]---

Here's a race:

  Thread A				Thread B

  cgroup1_mount()
    # alloc a new cgroup root
    cgroup_setup_root()
					cgroup1_mount()
					  # no sb yet, returns NULL
					  kernfs_pin_sb()

					  # but succeeds in getting the refcnt,
					  # so re-use cgroup root
					  percpu_ref_tryget_live()
    # alloc sb with cgroup root
    cgroup_do_mount()

  cgroup_kill_sb()
					  # alloc another sb with same root
					  cgroup_do_mount()

					cgroup_kill_sb()

We end up using the same cgroup root for two different superblocks,
so percpu_ref_kill() will be called twice on the same root when the
two superblocks are destroyed.

We should fix to make sure the superblock pinning is really successful.

Cc: stable@vger.kernel.org # 3.16+
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup-v1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 1dc22f6b49f5..12e19f0636ea 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1146,7 +1146,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
 		 * path is super cold.  Let's just sleep a bit and retry.
 		 */
 		pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
-		if (IS_ERR(pinned_sb) ||
+		if (IS_ERR_OR_NULL(pinned_sb) ||
 		    !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
 			mutex_unlock(&cgroup_mutex);
 			if (!IS_ERR_OR_NULL(pinned_sb))

From 3cf864520e877505158f09075794a08abab11bbe Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Fri, 31 Mar 2017 20:35:42 +0200
Subject: [PATCH 341/410] sata_via: Enable hotplug only on VT6421

Commit 57e5568fda27 ("sata_via: Implement hotplug for VT6421") adds
hotplug IRQ handler for VT6421 but enables hotplug on all chips. This
is a bug because it causes "irq xx: nobody cared" error on VT6420 when
hot-(un)plugging a drive:

[  381.839948] irq 20: nobody cared (try booting with the "irqpoll" option)
[  381.840014] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-rc5+ #148
[  381.840066] Hardware name:          P4VM800/P4VM800, BIOS P1.60 05/29/2006
[  381.840117] Call Trace:
[  381.840167]  <IRQ>
[  381.840225]  ? dump_stack+0x44/0x58
[  381.840278]  ? __report_bad_irq+0x14/0x97
[  381.840327]  ? handle_edge_irq+0xa5/0xa5
[  381.840376]  ? note_interrupt+0x155/0x1cf
[  381.840426]  ? handle_edge_irq+0xa5/0xa5
[  381.840474]  ? handle_irq_event_percpu+0x32/0x38
[  381.840524]  ? handle_irq_event+0x1f/0x38
[  381.840573]  ? handle_fasteoi_irq+0x69/0xb8
[  381.840625]  ? handle_irq+0x4f/0x5d
[  381.840672]  </IRQ>
[  381.840726]  ? do_IRQ+0x2e/0x8b
[  381.840782]  ? common_interrupt+0x2c/0x34
[  381.840836]  ? mwait_idle+0x60/0x82
[  381.840892]  ? arch_cpu_idle+0x6/0x7
[  381.840949]  ? do_idle+0x96/0x18e
[  381.841002]  ? cpu_startup_entry+0x16/0x1a
[  381.841057]  ? start_kernel+0x319/0x31c
[  381.841111]  ? startup_32_smp+0x166/0x168
[  381.841165] handlers:
[  381.841219] [<c12a7263>] ata_bmdma_interrupt
[  381.841274] Disabling IRQ #20

Seems that VT6420 can do hotplug too (there's no documentation) but the
comments say that SCR register access (required for detecting hotplug
events) can cause problems on these chips.

For now, just keep hotplug disabled on anything other than VT6421.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/sata_via.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c
index 0636d84fbefe..f3f538eec7b3 100644
--- a/drivers/ata/sata_via.c
+++ b/drivers/ata/sata_via.c
@@ -644,14 +644,16 @@ static void svia_configure(struct pci_dev *pdev, int board_id,
 		pci_write_config_byte(pdev, SATA_NATIVE_MODE, tmp8);
 	}
 
-	/* enable IRQ on hotplug */
-	pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8);
-	if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) {
-		dev_dbg(&pdev->dev,
-			"enabling SATA hotplug (0x%x)\n",
-			(int) tmp8);
-		tmp8 |= SATA_HOTPLUG;
-		pci_write_config_byte(pdev, SVIA_MISC_3, tmp8);
+	if (board_id == vt6421) {
+		/* enable IRQ on hotplug */
+		pci_read_config_byte(pdev, SVIA_MISC_3, &tmp8);
+		if ((tmp8 & SATA_HOTPLUG) != SATA_HOTPLUG) {
+			dev_dbg(&pdev->dev,
+				"enabling SATA hotplug (0x%x)\n",
+				(int) tmp8);
+			tmp8 |= SATA_HOTPLUG;
+			pci_write_config_byte(pdev, SVIA_MISC_3, tmp8);
+		}
 	}
 
 	/*

From 0beb2012a1722633515c8aaa263c73449636c893 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Apr 2017 09:47:24 -0700
Subject: [PATCH 342/410] libnvdimm: fix reconfig_mutex, mmap_sem, and
 jbd2_handle lockdep splat

Holding the reconfig_mutex over a potential userspace fault sets up a
lockdep dependency chain between filesystem-DAX and the libnvdimm ioctl
path. Move the user access outside of the lock.

     [ INFO: possible circular locking dependency detected ]
     4.11.0-rc3+ #13 Tainted: G        W  O
     -------------------------------------------------------
     fallocate/16656 is trying to acquire lock:
      (&nvdimm_bus->reconfig_mutex){+.+.+.}, at: [<ffffffffa00080b1>] nvdimm_bus_lock+0x21/0x30 [libnvdimm]
     but task is already holding lock:
      (jbd2_handle){++++..}, at: [<ffffffff813b4944>] start_this_handle+0x104/0x460

    which lock already depends on the new lock.

    the existing dependency chain (in reverse order) is:

    -> #2 (jbd2_handle){++++..}:
            lock_acquire+0xbd/0x200
            start_this_handle+0x16a/0x460
            jbd2__journal_start+0xe9/0x2d0
            __ext4_journal_start_sb+0x89/0x1c0
            ext4_dirty_inode+0x32/0x70
            __mark_inode_dirty+0x235/0x670
            generic_update_time+0x87/0xd0
            touch_atime+0xa9/0xd0
            ext4_file_mmap+0x90/0xb0
            mmap_region+0x370/0x5b0
            do_mmap+0x415/0x4f0
            vm_mmap_pgoff+0xd7/0x120
            SyS_mmap_pgoff+0x1c5/0x290
            SyS_mmap+0x22/0x30
            entry_SYSCALL_64_fastpath+0x1f/0xc2

    -> #1 (&mm->mmap_sem){++++++}:
            lock_acquire+0xbd/0x200
            __might_fault+0x70/0xa0
            __nd_ioctl+0x683/0x720 [libnvdimm]
            nvdimm_ioctl+0x8b/0xe0 [libnvdimm]
            do_vfs_ioctl+0xa8/0x740
            SyS_ioctl+0x79/0x90
            do_syscall_64+0x6c/0x200
            return_from_SYSCALL_64+0x0/0x7a

    -> #0 (&nvdimm_bus->reconfig_mutex){+.+.+.}:
            __lock_acquire+0x16b6/0x1730
            lock_acquire+0xbd/0x200
            __mutex_lock+0x88/0x9b0
            mutex_lock_nested+0x1b/0x20
            nvdimm_bus_lock+0x21/0x30 [libnvdimm]
            nvdimm_forget_poison+0x25/0x50 [libnvdimm]
            nvdimm_clear_poison+0x106/0x140 [libnvdimm]
            pmem_do_bvec+0x1c2/0x2b0 [nd_pmem]
            pmem_make_request+0xf9/0x270 [nd_pmem]
            generic_make_request+0x118/0x3b0
            submit_bio+0x75/0x150

Cc: <stable@vger.kernel.org>
Fixes: 62232e45f4a2 ("libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices")
Cc: Dave Jiang <dave.jiang@intel.com>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/bus.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 23d4a1728cdf..351bac8f6503 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -934,8 +934,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL);
 	if (rc < 0)
 		goto out_unlock;
+	nvdimm_bus_unlock(&nvdimm_bus->dev);
+
 	if (copy_to_user(p, buf, buf_len))
 		rc = -EFAULT;
+
+	vfree(buf);
+	return rc;
+
  out_unlock:
 	nvdimm_bus_unlock(&nvdimm_bus->dev);
  out:

From 4aa5615e080a9855e607accc75b07ab79b252dde Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Apr 2017 12:25:52 -0700
Subject: [PATCH 343/410] libnvdimm: band aid btt vs clear poison locking

The following warning results from holding a lane spinlock,
preempt_disable(), or the btt map spinlock and then trying to take the
reconfig_mutex to walk the poison list and potentially add new entries.

 BUG: sleeping function called from invalid context at kernel/locking/mutex.c:747
 in_atomic(): 1, irqs_disabled(): 0, pid: 17159, name: dd
 [..]
 Call Trace:
  dump_stack+0x85/0xc8
  ___might_sleep+0x184/0x250
  __might_sleep+0x4a/0x90
  __mutex_lock+0x58/0x9b0
  ? nvdimm_bus_lock+0x21/0x30 [libnvdimm]
  ? __nvdimm_bus_badblocks_clear+0x2f/0x60 [libnvdimm]
  ? acpi_nfit_forget_poison+0x79/0x80 [nfit]
  ? _raw_spin_unlock+0x27/0x40
  mutex_lock_nested+0x1b/0x20
  nvdimm_bus_lock+0x21/0x30 [libnvdimm]
  nvdimm_forget_poison+0x25/0x50 [libnvdimm]
  nvdimm_clear_poison+0x106/0x140 [libnvdimm]
  nsio_rw_bytes+0x164/0x270 [libnvdimm]
  btt_write_pg+0x1de/0x3e0 [nd_btt]
  ? blk_queue_enter+0x30/0x290
  btt_make_request+0x11a/0x310 [nd_btt]
  ? blk_queue_enter+0xb7/0x290
  ? blk_queue_enter+0x30/0x290
  generic_make_request+0x118/0x3b0

As a minimal fix, disable error clearing when the BTT is enabled for the
namespace. For the final fix a larger rework of the poison list locking
is needed.

Note that this is not a problem in the blk case since that path never
calls nvdimm_clear_poison().

Cc: <stable@vger.kernel.org>
Fixes: 82bf1037f2ca ("libnvdimm: check and clear poison before writing to pmem")
Cc: Dave Jiang <dave.jiang@intel.com>
[jeff: dynamically disable error clearing in the btt case]
Suggested-by: Jeff Moyer <jmoyer@redhat.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/claim.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index b3323c0697f6..ca6d572c48fc 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -243,7 +243,15 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 	}
 
 	if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
-		if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)) {
+		/*
+		 * FIXME: nsio_rw_bytes() may be called from atomic
+		 * context in the btt case and nvdimm_clear_poison()
+		 * takes a sleeping lock. Until the locking can be
+		 * reworked this capability requires that the namespace
+		 * is not claimed by btt.
+		 */
+		if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
+				&& (!ndns->claim || !is_nd_btt(ndns->claim))) {
 			long cleared;
 
 			cleared = nvdimm_clear_poison(&ndns->dev, offset, size);

From 5376366886251e2f8f248704adb620a4bc4c0937 Mon Sep 17 00:00:00 2001
From: Cameron Gutman <aicommander@gmail.com>
Date: Mon, 10 Apr 2017 20:44:25 -0700
Subject: [PATCH 344/410] Input: xpad - add support for Razer Wildcat gamepad

Cc: stable@vger.kernel.org
Signed-off-by: Cameron Gutman <aicommander@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index 155fcb3b6230..153b1ee13e03 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -202,6 +202,7 @@ static const struct xpad_device {
 	{ 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX },
 	{ 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 },
 	{ 0x1532, 0x0037, "Razer Sabertooth", 0, XTYPE_XBOX360 },
+	{ 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE },
 	{ 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 },
 	{ 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 },
 	{ 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 },
@@ -326,6 +327,7 @@ static struct usb_device_id xpad_table[] = {
 	XPAD_XBOX360_VENDOR(0x1430),		/* RedOctane X-Box 360 controllers */
 	XPAD_XBOX360_VENDOR(0x146b),		/* BigBen Interactive Controllers */
 	XPAD_XBOX360_VENDOR(0x1532),		/* Razer Sabertooth */
+	XPAD_XBOXONE_VENDOR(0x1532),		/* Razer Wildcat */
 	XPAD_XBOX360_VENDOR(0x15e4),		/* Numark X-Box 360 controllers */
 	XPAD_XBOX360_VENDOR(0x162e),		/* Joytech X-Box 360 controllers */
 	XPAD_XBOX360_VENDOR(0x1689),		/* Razer Onza */

From 350be257ea83029daee974c72b1fe2e6f1f8e615 Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilov@microsoft.com>
Date: Mon, 10 Apr 2017 10:31:33 -0700
Subject: [PATCH 345/410] CIFS: Fix null pointer deref during read resp
 processing

Currently during receiving a read response mid->resp_buf can be
NULL when it is being passed to cifs_discard_remaining_data() from
cifs_readv_discard(). Fix it by always passing server->smallbuf
instead and initializing mid->resp_buf at the end of read response
processing.

Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Acked-by: Sachin Prabhu <sprabhu@redhat.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsproto.h |  3 +--
 fs/cifs/cifssmb.c   | 15 ++++++++-------
 fs/cifs/smb2ops.c   |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index ec5e5e514fdd..97e5d236d265 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -79,8 +79,7 @@ extern void cifs_delete_mid(struct mid_q_entry *mid);
 extern void cifs_wake_up_task(struct mid_q_entry *mid);
 extern int cifs_handle_standard(struct TCP_Server_Info *server,
 				struct mid_q_entry *mid);
-extern int cifs_discard_remaining_data(struct TCP_Server_Info *server,
-				       char *buf);
+extern int cifs_discard_remaining_data(struct TCP_Server_Info *server);
 extern int cifs_call_async(struct TCP_Server_Info *server,
 			struct smb_rqst *rqst,
 			mid_receive_t *receive, mid_callback_t *callback,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 967b92631807..5d21f00ae341 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1400,9 +1400,9 @@ openRetry:
  * current bigbuf.
  */
 int
-cifs_discard_remaining_data(struct TCP_Server_Info *server, char *buf)
+cifs_discard_remaining_data(struct TCP_Server_Info *server)
 {
-	unsigned int rfclen = get_rfc1002_length(buf);
+	unsigned int rfclen = get_rfc1002_length(server->smallbuf);
 	int remaining = rfclen + 4 - server->total_read;
 
 	while (remaining > 0) {
@@ -1426,8 +1426,10 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	int length;
 	struct cifs_readdata *rdata = mid->callback_data;
 
-	length = cifs_discard_remaining_data(server, mid->resp_buf);
+	length = cifs_discard_remaining_data(server);
 	dequeue_mid(mid, rdata->result);
+	mid->resp_buf = server->smallbuf;
+	server->smallbuf = NULL;
 	return length;
 }
 
@@ -1459,7 +1461,7 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 
 	if (server->ops->is_status_pending &&
 	    server->ops->is_status_pending(buf, server, 0)) {
-		cifs_discard_remaining_data(server, buf);
+		cifs_discard_remaining_data(server);
 		return -1;
 	}
 
@@ -1519,9 +1521,6 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 	cifs_dbg(FYI, "0: iov_base=%p iov_len=%u\n",
 		 rdata->iov[0].iov_base, server->total_read);
 
-	mid->resp_buf = server->smallbuf;
-	server->smallbuf = NULL;
-
 	/* how much data is in the response? */
 	data_len = server->ops->read_data_length(buf);
 	if (data_offset + data_len > buflen) {
@@ -1544,6 +1543,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 		return cifs_readv_discard(server, mid);
 
 	dequeue_mid(mid, false);
+	mid->resp_buf = server->smallbuf;
+	server->smallbuf = NULL;
 	return length;
 }
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 7b12a727947e..152e37f2ad92 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2195,7 +2195,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 	if (rc)
 		goto free_pages;
 
-	rc = cifs_discard_remaining_data(server, buf);
+	rc = cifs_discard_remaining_data(server);
 	if (rc)
 		goto free_pages;
 
@@ -2221,7 +2221,7 @@ free_pages:
 	kfree(pages);
 	return rc;
 discard_data:
-	cifs_discard_remaining_data(server, buf);
+	cifs_discard_remaining_data(server);
 	goto free_pages;
 }
 

From 40920c2bb119fd49ba03e2f97a172171781be442 Mon Sep 17 00:00:00 2001
From: Mark Syms <mark.syms@citrix.com>
Date: Tue, 29 Nov 2016 11:36:46 +0000
Subject: [PATCH 346/410] CIFS: handle guest access errors to Windows shares

Commit 1a967d6c9b39c226be1b45f13acd4d8a5ab3dc44 ("correctly to
anonymous authentication for the NTLM(v2) authentication") introduces
a regression in handling errors related to attempting a guest
connection to a Windows share which requires authentication. This
should result in a permission denied error but actually causes the
kernel module to enter a never-ending loop trying to follow a DFS
referal which doesn't exist.

The base cause of this is the failure now occurs later in the process
during tree connect and not at the session setup setup and all errors
in tree connect are interpreted as needing to follow the DFS paths
which isn't in this case correct. So, check the returned error against
EACCES and fail if this is returned error.

Feedback from Aurelien:

  PS> net user guest /activate:no
    PS> mkdir C:\guestshare
      PS> icacls C:\guestshare /grant 'Everyone:(OI)(CI)F'
        PS> new-smbshare -name guestshare -path C:\guestshare -fullaccess Everyone

        I've tested v3.10, v4.4, master, master+your patch using default options
        (empty or no user "NU") and user=abc (U).

        NT_LOGON_FAILURE in session setup: LF
        This is what you seem to have in 3.10.

        NT_ACCESS_DENIED in tree connect to the share: AD
        This is what you get before your infinite loop.

                     |   NU       U
                     --------------------------------
                     3.10         |   LF       LF
                     4.4          |   LF       LF
                     master       |   AD       LF
                     master+patch |   AD       LF

                     No infinite DFS loop :(
                     All these issues result in mount failing very fast with permission denied.

                     I guess it could be from either the Windows version or the share/folder
                     ACL. A deeper analysis of the packets might reveal more.

                     In any case I did not notice any issues for on a basic DFS setup with
                     the patch so I don't think it introduced any regressions, which is
                     probably all that matters. It still bothers me a little I couldn't hit
                     the bug.

                     I've included kernel output w/ debugging output and network capture of
                     my tests if anyone want to have a look at it. (master+patch = ml-guestfix).

Signed-off-by: Mark Syms <mark.syms@citrix.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Tested-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/connect.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0c7596cef4b8..d82467cfb0e2 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3753,6 +3753,9 @@ try_mount_again:
 	if (IS_ERR(tcon)) {
 		rc = PTR_ERR(tcon);
 		tcon = NULL;
+		if (rc == -EACCES)
+			goto mount_fail_check;
+
 		goto remote_path_check;
 	}
 

From 18ea43113f5b74a97dd4be9bddbac10d68b1a6ce Mon Sep 17 00:00:00 2001
From: Germano Percossi <germano.percossi@citrix.com>
Date: Fri, 7 Apr 2017 12:29:36 +0100
Subject: [PATCH 347/410] CIFS: reconnect thread reschedule itself

In case of error, smb2_reconnect_server reschedule itself
with a delay, to avoid being too aggressive.

Signed-off-by: Germano Percossi <germano.percossi@citrix.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 66fa1b941cdf..d09e98bb8584 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2181,6 +2181,9 @@ void smb2_reconnect_server(struct work_struct *work)
 	struct cifs_tcon *tcon, *tcon2;
 	struct list_head tmp_list;
 	int tcon_exist = false;
+	int rc;
+	int resched = false;
+
 
 	/* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
 	mutex_lock(&server->reconnect_mutex);
@@ -2208,13 +2211,18 @@ void smb2_reconnect_server(struct work_struct *work)
 	spin_unlock(&cifs_tcp_ses_lock);
 
 	list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) {
-		if (!smb2_reconnect(SMB2_INTERNAL_CMD, tcon))
+		rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon);
+		if (!rc)
 			cifs_reopen_persistent_handles(tcon);
+		else
+			resched = true;
 		list_del_init(&tcon->rlist);
 		cifs_put_tcon(tcon);
 	}
 
 	cifs_dbg(FYI, "Reconnecting tcons finished\n");
+	if (resched)
+		queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ);
 	mutex_unlock(&server->reconnect_mutex);
 
 	/* now we can safely release srv struct */

From a0918f1ce6a43ac980b42b300ec443c154970979 Mon Sep 17 00:00:00 2001
From: Germano Percossi <germano.percossi@citrix.com>
Date: Fri, 7 Apr 2017 12:29:37 +0100
Subject: [PATCH 348/410] CIFS: remove bad_network_name flag

STATUS_BAD_NETWORK_NAME can be received during node failover,
causing the flag to be set and making the reconnect thread
always unsuccessful, thereafter.

Once the only place where it is set is removed, the remaining
bits are rendered moot.

Removing it does not prevent "mount" from failing when a non
existent share is passed.

What happens when the share really ceases to exist while the
share is mounted is undefined now as much as it was before.

Signed-off-by: Germano Percossi <germano.percossi@citrix.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/cifsglob.h | 1 -
 fs/cifs/smb2pdu.c  | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d07f13a63369..37f5a41cc50c 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -948,7 +948,6 @@ struct cifs_tcon {
 	bool use_persistent:1; /* use persistent instead of durable handles */
 #ifdef CONFIG_CIFS_SMB2
 	bool print:1;		/* set if connection to printer share */
-	bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */
 	__le32 capabilities;
 	__u32 share_flags;
 	__u32 maximal_access;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index d09e98bb8584..1bd5d3033fc8 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1171,9 +1171,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 	else
 		return -EIO;
 
-	if (tcon && tcon->bad_network_name)
-		return -ENOENT;
-
 	unc_path = kmalloc(MAX_SHARENAME_LENGTH * 2, GFP_KERNEL);
 	if (unc_path == NULL)
 		return -ENOMEM;
@@ -1277,8 +1274,6 @@ tcon_exit:
 tcon_error_exit:
 	if (rsp->hdr.sync_hdr.Status == STATUS_BAD_NETWORK_NAME) {
 		cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
-		if (tcon)
-			tcon->bad_network_name = true;
 	}
 	goto tcon_exit;
 }

From 1fa839b4986d648b907d117275869a0e46c324b9 Mon Sep 17 00:00:00 2001
From: Germano Percossi <germano.percossi@citrix.com>
Date: Fri, 7 Apr 2017 12:29:38 +0100
Subject: [PATCH 349/410] CIFS: store results of cifs_reopen_file to avoid
 infinite wait

This fixes Continuous Availability when errors during
file reopen are encountered.

cifs_user_readv and cifs_user_writev would wait for ever if
results of cifs_reopen_file are not stored and for later inspection.

In fact, results are checked and, in case of errors, a chain
of function calls leading to reads and writes to be scheduled in
a separate thread is skipped.
These threads will wake up the corresponding waiters once reads
and writes are done.

However, given the return value is not stored, when rc is checked
for errors a previous one (always zero) is inspected instead.
This leads to pending reads/writes added to the list, making
cifs_user_readv and cifs_user_writev wait for ever.

Signed-off-by: Germano Percossi <germano.percossi@citrix.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index aa3debbba826..21d404535739 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2597,7 +2597,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 		wdata->credits = credits;
 
 		if (!wdata->cfile->invalidHandle ||
-		    !cifs_reopen_file(wdata->cfile, false))
+		    !(rc = cifs_reopen_file(wdata->cfile, false)))
 			rc = server->ops->async_writev(wdata,
 					cifs_uncached_writedata_release);
 		if (rc) {
@@ -3022,7 +3022,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 		rdata->credits = credits;
 
 		if (!rdata->cfile->invalidHandle ||
-		    !cifs_reopen_file(rdata->cfile, true))
+		    !(rc = cifs_reopen_file(rdata->cfile, true)))
 			rc = server->ops->async_readv(rdata);
 error:
 		if (rc) {
@@ -3617,7 +3617,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 		}
 
 		if (!rdata->cfile->invalidHandle ||
-		    !cifs_reopen_file(rdata->cfile, true))
+		    !(rc = cifs_reopen_file(rdata->cfile, true)))
 			rc = server->ops->async_readv(rdata);
 		if (rc) {
 			add_credits_and_wake_if(server, rdata->credits, 0);

From 1c99a6874133ebf4513504ef7c32ce1f532d323f Mon Sep 17 00:00:00 2001
From: Markus Trippelsdorf <markus@trippelsdorf.de>
Date: Fri, 7 Apr 2017 14:09:04 +0200
Subject: [PATCH 350/410] x86/debug: Fix the printk() debug output of
 signal_fault(), do_trap() and do_general_protection()

Since commit:

  4bcc595ccd80 "printk: reinstate KERN_CONT for printing"

... the debug output of signal_fault(), do_trap() and do_general_protection()
looks garbled, e.g.:

 traps: conftest[9335] trap invalid opcode ip:400428 sp:7ffeaba1b0d8 error:0
  in conftest[400000+1000]

(note the unintended line break.)

Fix the bug by adding KERN_CONTs.

Signed-off-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/signal.c | 2 +-
 arch/x86/kernel/traps.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 396c042e9d0e..cc30a74e4adb 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -846,7 +846,7 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 		       task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
 		       me->comm, me->pid, where, frame,
 		       regs->ip, regs->sp, regs->orig_ax);
-		print_vma_addr(" in ", regs->ip);
+		print_vma_addr(KERN_CONT " in ", regs->ip);
 		pr_cont("\n");
 	}
 
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 948443e115c1..4e496379a871 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -255,7 +255,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 		pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx",
 			tsk->comm, tsk->pid, str,
 			regs->ip, regs->sp, error_code);
-		print_vma_addr(" in ", regs->ip);
+		print_vma_addr(KERN_CONT " in ", regs->ip);
 		pr_cont("\n");
 	}
 
@@ -519,7 +519,7 @@ do_general_protection(struct pt_regs *regs, long error_code)
 		pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx",
 			tsk->comm, task_pid_nr(tsk),
 			regs->ip, regs->sp, error_code);
-		print_vma_addr(" in ", regs->ip);
+		print_vma_addr(KERN_CONT " in ", regs->ip);
 		pr_cont("\n");
 	}
 

From 7f00f388712b29005782bad7e4b25942620f3b9c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 11 Apr 2017 09:14:46 +0200
Subject: [PATCH 351/410] x86/intel_rdt: Fix locking in
 rdtgroup_schemata_write()

The schemata lock is released before freeing the resource's temporary
tmp_cbms allocation. That's racy versus another write which allocates and
uses new temporary storage, resulting in memory leaks, freeing in use
memory, double a free or any combination of those.

Move the unlock after the release code.

Fixes: 60ec2440c63d ("x86/intel_rdt: Add schemata file")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Shaohua Li <shli@fb.com>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170411071446.15241-1-jolsa@kernel.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/cpu/intel_rdt_schemata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt_schemata.c b/arch/x86/kernel/cpu/intel_rdt_schemata.c
index f369cb8db0d5..badd2b31a560 100644
--- a/arch/x86/kernel/cpu/intel_rdt_schemata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_schemata.c
@@ -200,11 +200,11 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 	}
 
 out:
-	rdtgroup_kn_unlock(of->kn);
 	for_each_enabled_rdt_resource(r) {
 		kfree(r->tmp_cbms);
 		r->tmp_cbms = NULL;
 	}
+	rdtgroup_kn_unlock(of->kn);
 	return ret ?: nbytes;
 }
 

From 7036502783729c2aaf7a3c24c89087c58721430f Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 10 Apr 2017 13:16:33 +0300
Subject: [PATCH 352/410] pinctrl: cherryview: Add a quirk to make Acer
 Chromebook keyboard work again

After commit 47c950d10202 ("pinctrl: cherryview: Do not add all
southwest and north GPIOs to IRQ domain") the driver does not add all
GPIOs to the irqdomain. The reason for that is that those GPIOs cannot
generate IRQs at all, only GPEs (General Purpose Events). This causes
Linux virtual IRQ numbering to change.

However, it seems some CYAN Chromebooks, including Acer Chromebook
hardcodes these Linux IRQ numbers in the ACPI tables of the machine.
Since the numbering is different now, the IRQ meant for keyboard does
not match the Linux virtual IRQ number anymore making the keyboard
non-functional.

Work this around by adding special quirk just for these machines where
we add back all GPIOs to the irqdomain. Rest of the Cherryview/Braswell
based machines will not be affected by the change.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=194945
Fixes: 47c950d10202 ("pinctrl: cherryview: Do not add all southwest and north GPIOs to IRQ domain")
Reported-by: Adam S Levy <theadamlevy@gmail.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-cherryview.c | 26 ++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index f80134e3e0b6..9ff790174906 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -13,6 +13,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/dmi.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -1524,10 +1525,31 @@ static void chv_gpio_irq_handler(struct irq_desc *desc)
 	chained_irq_exit(chip, desc);
 }
 
+/*
+ * Certain machines seem to hardcode Linux IRQ numbers in their ACPI
+ * tables. Since we leave GPIOs that are not capable of generating
+ * interrupts out of the irqdomain the numbering will be different and
+ * cause devices using the hardcoded IRQ numbers fail. In order not to
+ * break such machines we will only mask pins from irqdomain if the machine
+ * is not listed below.
+ */
+static const struct dmi_system_id chv_no_valid_mask[] = {
+	{
+		/* See https://bugzilla.kernel.org/show_bug.cgi?id=194945 */
+		.ident = "Acer Chromebook (CYAN)",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Edgar"),
+			DMI_MATCH(DMI_BIOS_DATE, "05/21/2016"),
+		},
+	}
+};
+
 static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
 {
 	const struct chv_gpio_pinrange *range;
 	struct gpio_chip *chip = &pctrl->chip;
+	bool need_valid_mask = !dmi_check_system(chv_no_valid_mask);
 	int ret, i, offset;
 
 	*chip = chv_gpio_chip;
@@ -1536,7 +1558,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
 	chip->label = dev_name(pctrl->dev);
 	chip->parent = pctrl->dev;
 	chip->base = -1;
-	chip->irq_need_valid_mask = true;
+	chip->irq_need_valid_mask = need_valid_mask;
 
 	ret = devm_gpiochip_add_data(pctrl->dev, chip, pctrl);
 	if (ret) {
@@ -1567,7 +1589,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
 		intsel &= CHV_PADCTRL0_INTSEL_MASK;
 		intsel >>= CHV_PADCTRL0_INTSEL_SHIFT;
 
-		if (intsel >= pctrl->community->nirqs)
+		if (need_valid_mask && intsel >= pctrl->community->nirqs)
 			clear_bit(i, chip->irq_valid_mask);
 	}
 

From 84379d83d8e536aef2c706744ff22c136d0be6c9 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 11 Apr 2017 11:10:16 +0200
Subject: [PATCH 353/410] Revert "HID: rmi: Handle all Synaptics touchpads
 using hid-rmi"

This reverts commit 279967a65b320d174a507498aea7d44db3fee7f4.

Multiple regressions [1] [2] [3] have been reported. The hid-rmi
support would have to fixed and redone in 4.11+.

[1] http://lkml.kernel.org/r/b79b88c8-770a-13f6-5668-c3a94254e5e0@gmail.com
[2] http://lkml.kernel.org/r/375e67b5-2cb8-3491-1d71-d8650d6e9451@gmail.com
[3] https://bugzilla.kernel.org/show_bug.cgi?id=195287

Reported-by: Cameron Gutman <aicommander@gmail.com>
Reported-by: Gabriele Mazzotta <gabriele.mzt@gmail.com>
Reported-by: Lorenzo J. Lucchini <ljlbox@tiscali.it>
Reported-by: Thorsten Leemhuis <linux@leemhuis.info>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 86bbd8a1fd4a..d162f0dc76e3 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -819,8 +819,7 @@ static int hid_scan_report(struct hid_device *hid)
 		hid->group = HID_GROUP_WACOM;
 		break;
 	case USB_VENDOR_ID_SYNAPTICS:
-		if (hid->group == HID_GROUP_GENERIC ||
-		    hid->group == HID_GROUP_MULTITOUCH_WIN_8)
+		if (hid->group == HID_GROUP_GENERIC)
 			if ((parser->scan_flags & HID_SCAN_FLAG_VENDOR_SPECIFIC)
 			    && (parser->scan_flags & HID_SCAN_FLAG_GD_POINTER))
 				/*

From 63987bfebd8869e00b34e2bdd12e59d71909bec0 Mon Sep 17 00:00:00 2001
From: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Date: Wed, 5 Apr 2017 15:51:50 +0530
Subject: [PATCH 354/410] drm/i915: Suspend GuC prior to GPU Reset during GEM
 suspend

i915 is currently doing a full GPU reset at the end of
i915_gem_suspend() followed by GuC suspend in i915_drm_suspend(). This
GPU reset clobbers the GuC, causing the suspend request to then fail,
leaving the GuC in an undefined state. We need to tell the GuC to
suspend before we do the direct intel_gpu_reset().

v2: Commit message update. (Chris, Daniele)

Fixes: 1c777c5d1dcd ("drm/i915/hsw: Fix GPU hang during resume from S3-devices state")
Cc: Jeff McGee <jeff.mcgee@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1491387710-20553-1-git-send-email-sagar.a.kamble@intel.com
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit fd08923384385400101c71ac0d21d37d6b23b00d)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 2 --
 drivers/gpu/drm/i915/i915_gem.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1c75402a59c1..5c089b3c2a7e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1434,8 +1434,6 @@ static int i915_drm_suspend(struct drm_device *dev)
 		goto out;
 	}
 
-	intel_guc_suspend(dev_priv);
-
 	intel_display_suspend(dev);
 
 	intel_dp_mst_suspend(dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 67b1fc5a0331..fe531f904062 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4348,6 +4348,8 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	i915_gem_context_lost(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
+	intel_guc_suspend(dev_priv);
+
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
 

From c053b5a506d3afc038c485a86e3d461f6c7fb207 Mon Sep 17 00:00:00 2001
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Date: Fri, 7 Apr 2017 13:49:34 +0300
Subject: [PATCH 355/410] drm/i915: Don't call synchronize_rcu_expedited under
 struct_mutex

Only call synchronize_rcu_expedited after unlocking struct_mutex to
avoid deadlock because the workqueues depend on struct_mutex.

>From original patch by Andrea:

synchronize_rcu/synchronize_sched/synchronize_rcu_expedited() will
hang until its own workqueues are run. The i915 gem workqueues will
wait on the struct_mutex to be released. So we cannot wait for a
quiescent state using those rcu primitives while holding the
struct_mutex or it creates a circular lock dependency resulting in
kernel hangs (which is reproducible but goes undetected by lockdep).

kswapd0         D    0   700      2 0x00000000
Call Trace:
? __schedule+0x1a5/0x660
? schedule+0x36/0x80
? _synchronize_rcu_expedited.constprop.65+0x2ef/0x300
? wake_up_bit+0x20/0x20
? rcu_stall_kick_kthreads.part.54+0xc0/0xc0
? rcu_exp_wait_wake+0x530/0x530
? i915_gem_shrink+0x34b/0x4b0
? i915_gem_shrinker_scan+0x7c/0x90
? i915_gem_shrinker_scan+0x7c/0x90
? shrink_slab.part.61.constprop.72+0x1c1/0x3a0
? shrink_zone+0x154/0x160
? kswapd+0x40a/0x720
? kthread+0xf4/0x130
? try_to_free_pages+0x450/0x450
? kthread_create_on_node+0x40/0x40
? ret_from_fork+0x23/0x30
plasmashell     D    0  4657   4614 0x00000000
Call Trace:
? __schedule+0x1a5/0x660
? schedule+0x36/0x80
? schedule_preempt_disabled+0xe/0x10
? __mutex_lock.isra.4+0x1c9/0x790
? i915_gem_close_object+0x26/0xc0
? i915_gem_close_object+0x26/0xc0
? drm_gem_object_release_handle+0x48/0x90
? drm_gem_handle_delete+0x50/0x80
? drm_ioctl+0x1fa/0x420
? drm_gem_handle_create+0x40/0x40
? pipe_write+0x391/0x410
? __vfs_write+0xc6/0x120
? do_vfs_ioctl+0x8b/0x5d0
? SyS_ioctl+0x3b/0x70
? entry_SYSCALL_64_fastpath+0x13/0x94
kworker/0:0     D    0 29186      2 0x00000000
Workqueue: events __i915_gem_free_work
Call Trace:
? __schedule+0x1a5/0x660
? schedule+0x36/0x80
? schedule_preempt_disabled+0xe/0x10
? __mutex_lock.isra.4+0x1c9/0x790
? del_timer_sync+0x44/0x50
? update_curr+0x57/0x110
? __i915_gem_free_objects+0x31/0x300
? __i915_gem_free_objects+0x31/0x300
? __i915_gem_free_work+0x2d/0x40
? process_one_work+0x13a/0x3b0
? worker_thread+0x4a/0x460
? kthread+0xf4/0x130
? process_one_work+0x3b0/0x3b0
? kthread_create_on_node+0x40/0x40
? ret_from_fork+0x23/0x30

Fixes: 3d3d18f086cd ("drm/i915: Avoid rcu_barrier() from reclaim paths (shrinker)")
Reported-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit 8f612d055183545070ca1009ac2eb1f2e044cc20)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_shrinker.c | 26 +++++++++++++++---------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index d5d2b4c6ed38..70b3832a79dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -53,6 +53,17 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 	BUG();
 }
 
+static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
+{
+	if (!unlock)
+		return;
+
+	mutex_unlock(&dev->struct_mutex);
+
+	/* expedite the RCU grace period to free some request slabs */
+	synchronize_rcu_expedited();
+}
+
 static bool any_vma_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -232,11 +243,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 		intel_runtime_pm_put(dev_priv);
 
 	i915_gem_retire_requests(dev_priv);
-	if (unlock)
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	/* expedite the RCU grace period to free some request slabs */
-	synchronize_rcu_expedited();
+	i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
 
 	return count;
 }
@@ -293,8 +301,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
-	if (unlock)
-		mutex_unlock(&dev->struct_mutex);
+	i915_gem_shrinker_unlock(dev, unlock);
 
 	return count;
 }
@@ -321,8 +328,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 sc->nr_to_scan - freed,
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-	if (unlock)
-		mutex_unlock(&dev->struct_mutex);
+
+	i915_gem_shrinker_unlock(dev, unlock);
 
 	return freed;
 }
@@ -364,8 +371,7 @@ i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
 					 struct shrinker_lock_uninterruptible *slu)
 {
 	dev_priv->mm.interruptible = slu->was_interruptible;
-	if (slu->unlock)
-		mutex_unlock(&dev_priv->drm.struct_mutex);
+	i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
 }
 
 static int

From 0c45b36f8acc89cb94c8696a27574f01e0e411dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Fri, 7 Apr 2017 22:02:29 +0200
Subject: [PATCH 356/410] drm/udl: Fix unaligned memory access in
 udl_render_hline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On SPARC, the udl driver filled my kernel log with these messages:

[186668.910612] Kernel unaligned access at TPC[76609c] udl_render_hline+0x13c/0x3a0

Use put_unaligned_be16 to avoid them. On x86 this results in the same
code, but on SPARC the compiler emits two single-byte stores.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Acked-by: David Airlie <airlied@linux.ie>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20170407200229.20642-1-j.neuschaefer@gmx.net
---
 drivers/gpu/drm/udl/udl_transfer.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index 917dcb978c2c..0c87b1ac6b68 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/fb.h>
 #include <linux/prefetch.h>
+#include <asm/unaligned.h>
 
 #include <drm/drmP.h>
 #include "udl_drv.h"
@@ -163,7 +164,7 @@ static void udl_compress_hline16(
 			const u8 *const start = pixel;
 			const uint16_t repeating_pixel_val16 = pixel_val16;
 
-			*(uint16_t *)cmd = cpu_to_be16(pixel_val16);
+			put_unaligned_be16(pixel_val16, cmd);
 
 			cmd += 2;
 			pixel += bpp;

From 96a94cc5158859943b7e4e72ae69e572815f5413 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 Apr 2017 12:10:58 +0200
Subject: [PATCH 357/410] bpf: reference may_access_skb() from __bpf_prog_run()

It took me quite some time to figure out how this was linked,
so in order to save the next person the effort of finding it
add a comment in __bpf_prog_run() that indicates what exactly
determines that a program can access the ctx == skb.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f45827e205d3..b4f1cb0c5ac7 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1162,12 +1162,12 @@ out:
 	LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
 		off = IMM;
 load_word:
-		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
-		 * only appearing in the programs where ctx ==
-		 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
-		 * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
-		 * internal BPF verifier will check that BPF_R6 ==
-		 * ctx.
+		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
+		 * appearing in the programs where ctx == skb
+		 * (see may_access_skb() in the verifier). All programs
+		 * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
+		 * bpf_convert_filter() saves it in BPF_R6, internal BPF
+		 * verifier will check that BPF_R6 == ctx.
 		 *
 		 * BPF_ABS and BPF_IND are wrappers of function calls,
 		 * so they scratch BPF_R1-BPF_R5 registers, preserve

From 951e7966398b0fd6bacebec2d87ffd61c3f68b18 Mon Sep 17 00:00:00 2001
From: Adam Borowski <kilobyte@angband.pl>
Date: Fri, 31 Mar 2017 17:19:04 +0200
Subject: [PATCH 358/410] btrfs: drop the nossd flag when remounting with -o
 ssd

The opposite case was already handled right in the very next switch entry.
And also when turning on nossd, drop ssd_spread.

Reported-by: Hans van Kranenburg <hans.van.kranenburg@mendix.com>
Signed-off-by: Adam Borowski <kilobyte@angband.pl>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index da687dc79cce..9530a333d302 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -549,16 +549,19 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 		case Opt_ssd:
 			btrfs_set_and_info(info, SSD,
 					   "use ssd allocation scheme");
+			btrfs_clear_opt(info->mount_opt, NOSSD);
 			break;
 		case Opt_ssd_spread:
 			btrfs_set_and_info(info, SSD_SPREAD,
 					   "use spread ssd allocation scheme");
 			btrfs_set_opt(info->mount_opt, SSD);
+			btrfs_clear_opt(info->mount_opt, NOSSD);
 			break;
 		case Opt_nossd:
 			btrfs_set_and_info(info, NOSSD,
 					     "not using ssd allocation scheme");
 			btrfs_clear_opt(info->mount_opt, SSD);
+			btrfs_clear_opt(info->mount_opt, SSD_SPREAD);
 			break;
 		case Opt_barrier:
 			btrfs_clear_and_info(info, NOBARRIER,

From 2e949b0a5592664f8b3eb3e2e48213f514892561 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 5 Apr 2017 14:04:19 -0700
Subject: [PATCH 359/410] Btrfs: fix invalid dereference in btrfs_retry_endio

When doing directIO repair, we have this oops:

[ 1458.532816] general protection fault: 0000 [#1] SMP
...
[ 1458.536291] Workqueue: btrfs-endio-repair btrfs_endio_repair_helper [btrfs]
[ 1458.536893] task: ffff88082a42d100 task.stack: ffffc90002b3c000
[ 1458.537499] RIP: 0010:btrfs_retry_endio+0x7e/0x1a0 [btrfs]
...
[ 1458.543261] Call Trace:
[ 1458.543958]  ? rcu_read_lock_sched_held+0xc4/0xd0
[ 1458.544374]  bio_endio+0xed/0x100
[ 1458.544750]  end_workqueue_fn+0x3c/0x40 [btrfs]
[ 1458.545257]  normal_work_helper+0x9f/0x900 [btrfs]
[ 1458.545762]  btrfs_endio_repair_helper+0x12/0x20 [btrfs]
[ 1458.546224]  process_one_work+0x34d/0xb70
[ 1458.546570]  ? process_one_work+0x29e/0xb70
[ 1458.546938]  worker_thread+0x1cf/0x960
[ 1458.547263]  ? process_one_work+0xb70/0xb70
[ 1458.547624]  kthread+0x17d/0x180
[ 1458.547909]  ? kthread_create_on_node+0x70/0x70
[ 1458.548300]  ret_from_fork+0x31/0x40

It turns out that btrfs_retry_endio is trying to get inode from a directIO
page.

This fixes the problem by using the saved inode pointer, done->inode.
btrfs_retry_endio_nocsum has the same problem, and it's fixed as well.

Also cleanup unused @start (which is too trivial for a separate patch).

Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 876f1d36030c..388c6ce069de 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7910,7 +7910,6 @@ struct btrfs_retry_complete {
 static void btrfs_retry_endio_nocsum(struct bio *bio)
 {
 	struct btrfs_retry_complete *done = bio->bi_private;
-	struct inode *inode;
 	struct bio_vec *bvec;
 	int i;
 
@@ -7918,12 +7917,12 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 		goto end;
 
 	ASSERT(bio->bi_vcnt == 1);
-	inode = bio->bi_io_vec->bv_page->mapping->host;
-	ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode));
+	ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode));
 
 	done->uptodate = 1;
 	bio_for_each_segment_all(bvec, bio, i)
-	clean_io_failure(BTRFS_I(done->inode), done->start, bvec->bv_page, 0);
+		clean_io_failure(BTRFS_I(done->inode), done->start,
+				 bvec->bv_page, 0);
 end:
 	complete(&done->done);
 	bio_put(bio);
@@ -7986,9 +7985,7 @@ static void btrfs_retry_endio(struct bio *bio)
 {
 	struct btrfs_retry_complete *done = bio->bi_private;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
-	struct inode *inode;
 	struct bio_vec *bvec;
-	u64 start;
 	int uptodate;
 	int ret;
 	int i;
@@ -7998,11 +7995,8 @@ static void btrfs_retry_endio(struct bio *bio)
 
 	uptodate = 1;
 
-	start = done->start;
-
 	ASSERT(bio->bi_vcnt == 1);
-	inode = bio->bi_io_vec->bv_page->mapping->host;
-	ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(inode));
+	ASSERT(bio->bi_io_vec->bv_len == btrfs_inode_sectorsize(done->inode));
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		ret = __readpage_endio_check(done->inode, io_bio, i,

From 97bf5a5589aa3a59c60aa775fc12ec0483fc5002 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 7 Apr 2017 13:11:10 -0700
Subject: [PATCH 360/410] Btrfs: fix segmentation fault when doing dio read

Commit 2dabb3248453 ("Btrfs: Direct I/O read: Work on sectorsized blocks")
introduced this bug during iterating bio pages in dio read's endio hook,
and it could end up with segment fault of the dio reading task.

So the reason is 'if (nr_sectors--)', and it makes the code assume that
there is one more block in the same page, so page offset is increased and
the bio which is created to repair the bad block then has an incorrect
bvec.bv_offset, and a later access of the page content would throw a
segmentation fault.

This also adds ASSERT to check page offset against page size.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 388c6ce069de..55ed2c4829a8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7972,8 +7972,10 @@ next_block_or_try_again:
 
 		start += sectorsize;
 
-		if (nr_sectors--) {
+		nr_sectors--;
+		if (nr_sectors) {
 			pgoff += sectorsize;
+			ASSERT(pgoff < PAGE_SIZE);
 			goto next_block_or_try_again;
 		}
 	}
@@ -8074,8 +8076,10 @@ next:
 
 		ASSERT(nr_sectors);
 
-		if (--nr_sectors) {
+		nr_sectors--;
+		if (nr_sectors) {
 			pgoff += sectorsize;
+			ASSERT(pgoff < PAGE_SIZE);
 			goto next_block;
 		}
 	}

From a967efb30b3afa3d858edd6a17f544f9e9e46eea Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 10 Apr 2017 12:36:26 -0700
Subject: [PATCH 361/410] Btrfs: fix potential use-after-free for cloned bio

KASAN reports that there is a use-after-free case of bio in btrfs_map_bio.

If we need to submit IOs to several disks at a time, the original bio
would get cloned and mapped to the destination disk, but we really should
use the original bio instead of a cloned bio to do the sanity check
because cloned bios are likely to be freed by its endio.

Reported-by: Diego <diegocg@gmail.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7c8c7bbee197..7c7e0c99360f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6213,7 +6213,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
 	for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
 		dev = bbio->stripes[dev_nr].dev;
 		if (!dev || !dev->bdev ||
-		    (bio_op(bio) == REQ_OP_WRITE && !dev->writeable)) {
+		    (bio_op(first_bio) == REQ_OP_WRITE && !dev->writeable)) {
 			bbio_error(bbio, first_bio, logical);
 			continue;
 		}

From 911e572e98656056ebbbb4274d8fe61434188646 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@linux.vnet.ibm.com>
Date: Thu, 6 Apr 2017 18:12:09 -0300
Subject: [PATCH 362/410] scsi: aacraid: fix PCI error recovery path

During a PCI error recovery, if aac_check_health() is not aware that a
PCI error happened and we have an offline PCI channel, it might trigger
some errors (like NULL pointer dereference) and inhibit the error
recovery process to complete.

This patch makes the health check procedure aware of PCI channel issues,
and in case of error recovery process, the function
aac_adapter_check_health() returns -1 and let the recovery process to
complete successfully. This patch was tested on upstream kernel
v4.11-rc5 in PowerPC ppc64le architecture with adapter 9005:028d
(VID:DID) - the error recovery procedure was able to recover fine.

Fixes: 5c63f7f710bd ("aacraid: Added EEH support")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
Reviewed-by: Dave Carroll <david.carroll@microsemi.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aacraid.h | 11 ++++++++---
 drivers/scsi/aacraid/commsup.c |  3 ++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index d036a806f31c..d281492009fb 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1690,9 +1690,6 @@ struct aac_dev
 #define aac_adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4) \
 	(dev)->a_ops.adapter_sync_cmd(dev, command, p1, p2, p3, p4, p5, p6, status, r1, r2, r3, r4)
 
-#define aac_adapter_check_health(dev) \
-	(dev)->a_ops.adapter_check_health(dev)
-
 #define aac_adapter_restart(dev, bled, reset_type) \
 	((dev)->a_ops.adapter_restart(dev, bled, reset_type))
 
@@ -2615,6 +2612,14 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor)
 	return capacity;
 }
 
+static inline int aac_adapter_check_health(struct aac_dev *dev)
+{
+	if (unlikely(pci_channel_offline(dev->pdev)))
+		return -1;
+
+	return (dev)->a_ops.adapter_check_health(dev);
+}
+
 /* SCp.phase values */
 #define AAC_OWNER_MIDLEVEL	0x101
 #define AAC_OWNER_LOWLEVEL	0x102
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index c8172f16cf33..1f4918355fdb 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1873,7 +1873,8 @@ int aac_check_health(struct aac_dev * aac)
 	spin_unlock_irqrestore(&aac->fib_lock, flagv);
 
 	if (BlinkLED < 0) {
-		printk(KERN_ERR "%s: Host adapter dead %d\n", aac->name, BlinkLED);
+		printk(KERN_ERR "%s: Host adapter is dead (or got a PCI error) %d\n",
+				aac->name, BlinkLED);
 		goto out;
 	}
 

From 785a470496d8e0a32e3d39f376984eb2c98ca5b3 Mon Sep 17 00:00:00 2001
From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Date: Tue, 11 Apr 2017 11:46:04 -0300
Subject: [PATCH 363/410] scsi: ipr: do not set DID_PASSTHROUGH on CHECK
 CONDITION

On a dual controller setup with multipath enabled, some MEDIUM ERRORs
caused both paths to be failed, thus I/O got queued/blocked since the
'queue_if_no_path' feature is enabled by default on IPR controllers.

This example disabled 'queue_if_no_path' so the I/O failure is seen at
the sg_dd program.  Notice that after the sg_dd test-case, both paths
are in 'failed' state, and both path/priority groups are in 'enabled'
state (not 'active') -- which would block I/O with 'queue_if_no_path'.

    # sg_dd if=/dev/dm-2 bs=4096 count=1 dio=1 verbose=4 blk_sgio=0
    <...>
    read(unix): count=4096, res=-1
    sg_dd: reading, skip=0 : Input/output error
    <...>

    # dmesg
    [...] sd 2:2:16:0: [sds] FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
    [...] sd 2:2:16:0: [sds] Sense Key : Medium Error [current]
    [...] sd 2:2:16:0: [sds] Add. Sense: Unrecovered read error - recommend rewrite the data
    [...] sd 2:2:16:0: [sds] CDB: Read(10) 28 00 00 00 00 00 00 00 20 00
    [...] blk_update_request: I/O error, dev sds, sector 0
    [...] device-mapper: multipath: Failing path 65:32.
    <...>
    [...] device-mapper: multipath: Failing path 65:224.

    # multipath -l
    1IBM_IPR-0_59C2AE0000001F80 dm-2 IBM     ,IPR-0   59C2AE00
    size=5.2T features='0' hwhandler='1 alua' wp=rw
    |-+- policy='service-time 0' prio=0 status=enabled
    | `- 2:2:16:0 sds  65:32  failed undef running
    `-+- policy='service-time 0' prio=0 status=enabled
      `- 1:2:7:0  sdae 65:224 failed undef running

This is not the desired behavior. The dm-multipath explicitly checks
for the MEDIUM ERROR case (and a few others) so not to fail the path
(e.g., I/O to other sectors could potentially happen without problems).
See dm-mpath.c :: do_end_io_bio() -> noretry_error() !->! fail_path().

The problem trace is:

1) ipr_scsi_done()  // SENSE KEY/CHECK CONDITION detected, go to..
2) ipr_erp_start()  // ipr_is_gscsi() and masked_ioasc OK, go to..
3) ipr_gen_sense()  // masked_ioasc is IPR_IOASC_MED_DO_NOT_REALLOC,
                    // so set DID_PASSTHROUGH.

4) scsi_decide_disposition()  // check for DID_PASSTHROUGH and return
                              // early on, faking a DID_OK.. *instead*
                              // of reaching scsi_check_sense().

                              // Had it reached the latter, that would
                              // set host_byte to DID_MEDIUM_ERROR.

5) scsi_finish_command()
6) scsi_io_completion()
7) __scsi_error_from_host_byte()  // That would be converted to -ENODATA
<...>
8) dm_softirq_done()
9) multipath_end_io()
10) do_end_io()
11) noretry_error()  // And that is checked in dm-mpath :: noretry_error()
                     // which would cause fail_path() not to be called.

With this patch applied, the I/O is failed but the paths are not.  This
multipath device continues accepting more I/O requests without blocking.
(and notice the different host byte/driver byte handling per SCSI layer).

    # dmesg
    [...] sd 2:2:7:0: [sdaf] Done: SUCCESS Result: hostbyte=0x13 driverbyte=DRIVER_OK
    [...] sd 2:2:7:0: [sdaf] CDB: Read(10) 28 00 00 00 00 00 00 00 40 00
    [...] sd 2:2:7:0: [sdaf] Sense Key : Medium Error [current]
    [...] sd 2:2:7:0: [sdaf] Add. Sense: Unrecovered read error - recommend rewrite the data
    [...] blk_update_request: critical medium error, dev sdaf, sector 0
    [...] blk_update_request: critical medium error, dev dm-6, sector 0
    [...] sd 2:2:7:0: [sdaf] Done: SUCCESS Result: hostbyte=0x13 driverbyte=DRIVER_OK
    [...] sd 2:2:7:0: [sdaf] CDB: Read(10) 28 00 00 00 00 00 00 00 10 00
    [...] sd 2:2:7:0: [sdaf] Sense Key : Medium Error [current]
    [...] sd 2:2:7:0: [sdaf] Add. Sense: Unrecovered read error - recommend rewrite the data
    [...] blk_update_request: critical medium error, dev sdaf, sector 0
    [...] blk_update_request: critical medium error, dev dm-6, sector 0
    [...] Buffer I/O error on dev dm-6, logical block 0, async page read

    # multipath -l 1IBM_IPR-0_59C2AE0000001F80
    1IBM_IPR-0_59C2AE0000001F80 dm-6 IBM     ,IPR-0   59C2AE00
    size=5.2T features='1 queue_if_no_path' hwhandler='1 alua' wp=rw
    |-+- policy='service-time 0' prio=0 status=active
    | `- 2:2:7:0  sdaf 65:240 active undef running
    `-+- policy='service-time 0' prio=0 status=enabled
      `- 1:2:7:0  sdh  8:112  active undef running

Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Acked-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ipr.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 835c59c777f2..0d782135ac55 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6293,7 +6293,12 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg,
 		break;
 	case IPR_IOASC_MED_DO_NOT_REALLOC: /* prevent retries */
 	case IPR_IOASA_IR_DUAL_IOA_DISABLED:
-		scsi_cmd->result |= (DID_PASSTHROUGH << 16);
+		/*
+		 * exception: do not set DID_PASSTHROUGH on CHECK CONDITION
+		 * so SCSI mid-layer and upper layers handle it accordingly.
+		 */
+		if (scsi_cmd->result != SAM_STAT_CHECK_CONDITION)
+			scsi_cmd->result |= (DID_PASSTHROUGH << 16);
 		break;
 	case IPR_IOASC_BUS_WAS_RESET:
 	case IPR_IOASC_BUS_WAS_RESET_BY_OTHER:

From b6fe0440c63716e09cfc0d1484e3898a0f29d1d1 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 10 Apr 2017 14:59:27 +0300
Subject: [PATCH 364/410] bridge: implement missing ndo_uninit()

While the bridge driver implements an ndo_init(), it was missing a
symmetric ndo_uninit(), causing the different de-initialization
operations to be scattered around its dellink() and destructor().

Implement a symmetric ndo_uninit() and remove the overlapping operations
from its dellink() and destructor().

This is a prerequisite for the next patch, as it allows us to have a
proper cleanup upon changelink() failure during the bridge's newlink().

Fixes: b6677449dff6 ("bridge: netlink: call br_changelink() during br_dev_newlink()")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c    | 20 +++++++++++---------
 net/bridge/br_if.c        |  1 -
 net/bridge/br_multicast.c |  7 +++++--
 net/bridge/br_private.h   |  5 +++++
 4 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index ea71513fca21..90f49a194249 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -119,6 +119,15 @@ static int br_dev_init(struct net_device *dev)
 	return err;
 }
 
+static void br_dev_uninit(struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	br_multicast_uninit_stats(br);
+	br_vlan_flush(br);
+	free_percpu(br->stats);
+}
+
 static int br_dev_open(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
@@ -332,6 +341,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_open		 = br_dev_open,
 	.ndo_stop		 = br_dev_stop,
 	.ndo_init		 = br_dev_init,
+	.ndo_uninit		 = br_dev_uninit,
 	.ndo_start_xmit		 = br_dev_xmit,
 	.ndo_get_stats64	 = br_get_stats64,
 	.ndo_set_mac_address	 = br_set_mac_address,
@@ -356,14 +366,6 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_features_check	 = passthru_features_check,
 };
 
-static void br_dev_free(struct net_device *dev)
-{
-	struct net_bridge *br = netdev_priv(dev);
-
-	free_percpu(br->stats);
-	free_netdev(dev);
-}
-
 static struct device_type br_type = {
 	.name	= "bridge",
 };
@@ -376,7 +378,7 @@ void br_dev_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->netdev_ops = &br_netdev_ops;
-	dev->destructor = br_dev_free;
+	dev->destructor = free_netdev;
 	dev->ethtool_ops = &br_ethtool_ops;
 	SET_NETDEV_DEVTYPE(dev, &br_type);
 	dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 8ac1770aa222..56a2a72e7738 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -311,7 +311,6 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
 
 	br_fdb_delete_by_port(br, NULL, 0, 1);
 
-	br_vlan_flush(br);
 	br_multicast_dev_del(br);
 	cancel_delayed_work_sync(&br->gc_work);
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b760f2620abf..faa7261a992f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -2031,8 +2031,6 @@ void br_multicast_dev_del(struct net_bridge *br)
 
 out:
 	spin_unlock_bh(&br->multicast_lock);
-
-	free_percpu(br->mcast_stats);
 }
 
 int br_multicast_set_router(struct net_bridge *br, unsigned long val)
@@ -2531,6 +2529,11 @@ int br_multicast_init_stats(struct net_bridge *br)
 	return 0;
 }
 
+void br_multicast_uninit_stats(struct net_bridge *br)
+{
+	free_percpu(br->mcast_stats);
+}
+
 static void mcast_stats_add_dir(u64 *dst, u64 *src)
 {
 	dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX];
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 61368186edea..0d177280aa84 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -620,6 +620,7 @@ void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
 void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p,
 			const struct sk_buff *skb, u8 type, u8 dir);
 int br_multicast_init_stats(struct net_bridge *br);
+void br_multicast_uninit_stats(struct net_bridge *br);
 void br_multicast_get_stats(const struct net_bridge *br,
 			    const struct net_bridge_port *p,
 			    struct br_mcast_stats *dest);
@@ -760,6 +761,10 @@ static inline int br_multicast_init_stats(struct net_bridge *br)
 	return 0;
 }
 
+static inline void br_multicast_uninit_stats(struct net_bridge *br)
+{
+}
+
 static inline int br_multicast_igmp_type(const struct sk_buff *skb)
 {
 	return 0;

From 5b8d5429daa05bebef6ffd3297df3b502cc6f184 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 10 Apr 2017 14:59:28 +0300
Subject: [PATCH 365/410] bridge: netlink: register netdevice before executing
 changelink

Peter reported a kernel oops when executing the following command:

$ ip link add name test type bridge vlan_default_pvid 1

[13634.939408] BUG: unable to handle kernel NULL pointer dereference at
0000000000000190
[13634.939436] IP: __vlan_add+0x73/0x5f0
[...]
[13634.939783] Call Trace:
[13634.939791]  ? pcpu_next_unpop+0x3b/0x50
[13634.939801]  ? pcpu_alloc+0x3d2/0x680
[13634.939810]  ? br_vlan_add+0x135/0x1b0
[13634.939820]  ? __br_vlan_set_default_pvid.part.28+0x204/0x2b0
[13634.939834]  ? br_changelink+0x120/0x4e0
[13634.939844]  ? br_dev_newlink+0x50/0x70
[13634.939854]  ? rtnl_newlink+0x5f5/0x8a0
[13634.939864]  ? rtnl_newlink+0x176/0x8a0
[13634.939874]  ? mem_cgroup_commit_charge+0x7c/0x4e0
[13634.939886]  ? rtnetlink_rcv_msg+0xe1/0x220
[13634.939896]  ? lookup_fast+0x52/0x370
[13634.939905]  ? rtnl_newlink+0x8a0/0x8a0
[13634.939915]  ? netlink_rcv_skb+0xa1/0xc0
[13634.939925]  ? rtnetlink_rcv+0x24/0x30
[13634.939934]  ? netlink_unicast+0x177/0x220
[13634.939944]  ? netlink_sendmsg+0x2fe/0x3b0
[13634.939954]  ? _copy_from_user+0x39/0x40
[13634.939964]  ? sock_sendmsg+0x30/0x40
[13634.940159]  ? ___sys_sendmsg+0x29d/0x2b0
[13634.940326]  ? __alloc_pages_nodemask+0xdf/0x230
[13634.940478]  ? mem_cgroup_commit_charge+0x7c/0x4e0
[13634.940592]  ? mem_cgroup_try_charge+0x76/0x1a0
[13634.940701]  ? __handle_mm_fault+0xdb9/0x10b0
[13634.940809]  ? __sys_sendmsg+0x51/0x90
[13634.940917]  ? entry_SYSCALL_64_fastpath+0x1e/0xad

The problem is that the bridge's VLAN group is created after setting the
default PVID, when registering the netdevice and executing its
ndo_init().

Fix this by changing the order of both operations, so that
br_changelink() is only processed after the netdevice is registered,
when the VLAN group is already initialized.

Fixes: b6677449dff6 ("bridge: netlink: call br_changelink() during br_dev_newlink()")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Peter V. Saveliev <peter@svinota.eu>
Tested-by: Peter V. Saveliev <peter@svinota.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netlink.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a8f6acd23e30..225ef7d53701 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1165,11 +1165,14 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
 		spin_unlock_bh(&br->lock);
 	}
 
-	err = br_changelink(dev, tb, data);
+	err = register_netdevice(dev);
 	if (err)
 		return err;
 
-	return register_netdevice(dev);
+	err = br_changelink(dev, tb, data);
+	if (err)
+		unregister_netdevice(dev);
+	return err;
 }
 
 static size_t br_get_size(const struct net_device *brdev)

From df7dd8fc965c665e83b71a649378cdf200ff36df Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 12 Apr 2017 09:32:07 +0200
Subject: [PATCH 366/410] net: xdp: don't export dev_change_xdp_fd()

Since dev_change_xdp_fd() is only used in rtnetlink, which must
be built-in, there's no reason to export dev_change_xdp_fd().

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 7869ae3837ca..533a6d6f6092 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6757,7 +6757,6 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
 
 	return err;
 }
-EXPORT_SYMBOL(dev_change_xdp_fd);
 
 /**
  *	dev_new_index	-	allocate an ifindex

From 45abdf35cf82e4270328c7237e7812de960ac560 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 12 Apr 2017 00:31:16 +0000
Subject: [PATCH 367/410] drm/etnaviv: fix missing unlock on error in
 etnaviv_gpu_submit()

Add the missing unlock before return from function etnaviv_gpu_submit()
in the error handling case.

lst: fixed label name.

Fixes: f3cd1b064f11 ("drm/etnaviv: (re-)protect fence allocation with
GPU mutex")
CC: stable@vger.kernel.org #4.9+
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index da48819ff2e6..b78d9239e48f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1317,7 +1317,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	if (!fence) {
 		event_free(gpu, event);
 		ret = -ENOMEM;
-		goto out_pm_put;
+		goto out_unlock;
 	}
 
 	gpu->event[event].fence = fence;
@@ -1357,6 +1357,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	hangcheck_timer_reset(gpu);
 	ret = 0;
 
+out_unlock:
 	mutex_unlock(&gpu->lock);
 
 out_pm_put:

From a2d6cbb0670d54806f18192cb0db266b4a6d285a Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabinv@axis.com>
Date: Mon, 10 Apr 2017 08:36:39 +0200
Subject: [PATCH 368/410] ipv6: Fix idev->addr_list corruption

addrconf_ifdown() removes elements from the idev->addr_list without
holding the idev->lock.

If this happens while the loop in __ipv6_dev_get_saddr() is handling the
same element, that function ends up in an infinite loop:

  NMI watchdog: BUG: soft lockup - CPU#1 stuck for 23s! [test:1719]
  Call Trace:
   ipv6_get_saddr_eval+0x13c/0x3a0
   __ipv6_dev_get_saddr+0xe4/0x1f0
   ipv6_dev_get_saddr+0x1b4/0x204
   ip6_dst_lookup_tail+0xcc/0x27c
   ip6_dst_lookup_flow+0x38/0x80
   udpv6_sendmsg+0x708/0xba8
   sock_sendmsg+0x18/0x30
   SyS_sendto+0xb8/0xf8
   syscall_common+0x34/0x58

Fixes: 6a923934c33 (Revert "ipv6: Revert optional address flusing on ifdown.")
Signed-off-by: Rabin Vincent <rabinv@axis.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 363172527e43..80ce478c4851 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3626,14 +3626,19 @@ restart:
 	INIT_LIST_HEAD(&del_list);
 	list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
 		struct rt6_info *rt = NULL;
+		bool keep;
 
 		addrconf_del_dad_work(ifa);
 
+		keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
+			!addr_is_local(&ifa->addr);
+		if (!keep)
+			list_move(&ifa->if_list, &del_list);
+
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
 
-		if (keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
-		    !addr_is_local(&ifa->addr)) {
+		if (keep) {
 			/* set state to skip the notifier below */
 			state = INET6_IFADDR_STATE_DEAD;
 			ifa->state = 0;
@@ -3645,8 +3650,6 @@ restart:
 		} else {
 			state = ifa->state;
 			ifa->state = INET6_IFADDR_STATE_DEAD;
-
-			list_move(&ifa->if_list, &del_list);
 		}
 
 		spin_unlock_bh(&ifa->lock);

From a4866aa812518ed1a37d8ea0c881dc946409de94 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 5 Apr 2017 09:39:08 -0700
Subject: [PATCH 369/410] mm: Tighten x86 /dev/mem with zeroing reads

Under CONFIG_STRICT_DEVMEM, reading System RAM through /dev/mem is
disallowed. However, on x86, the first 1MB was always allowed for BIOS
and similar things, regardless of it actually being System RAM. It was
possible for heap to end up getting allocated in low 1MB RAM, and then
read by things like x86info or dd, which would trip hardened usercopy:

usercopy: kernel memory exposure attempt detected from ffff880000090000 (dma-kmalloc-256) (4096 bytes)

This changes the x86 exception for the low 1MB by reading back zeros for
System RAM areas instead of blindly allowing them. More work is needed to
extend this to mmap, but currently mmap doesn't go through usercopy, so
hardened usercopy won't Oops the kernel.

Reported-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Tested-by: Tommi Rantala <tommi.t.rantala@nokia.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/x86/mm/init.c | 41 ++++++++++++++++-------
 drivers/char/mem.c | 82 +++++++++++++++++++++++++++++-----------------
 2 files changed, 82 insertions(+), 41 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 22af912d66d2..889e7619a091 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -643,21 +643,40 @@ void __init init_mem_mapping(void)
  * devmem_is_allowed() checks to see if /dev/mem access to a certain address
  * is valid. The argument is a physical page number.
  *
- *
- * On x86, access has to be given to the first megabyte of ram because that area
- * contains BIOS code and data regions used by X and dosemu and similar apps.
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
- * mmio resources as well as potential bios/acpi data regions.
+ * On x86, access has to be given to the first megabyte of RAM because that
+ * area traditionally contains BIOS code and data regions used by X, dosemu,
+ * and similar apps. Since they map the entire memory range, the whole range
+ * must be allowed (for mapping), but any areas that would otherwise be
+ * disallowed are flagged as being "zero filled" instead of rejected.
+ * Access has to be given to non-kernel-ram areas as well, these contain the
+ * PCI mmio resources as well as potential bios/acpi data regions.
  */
 int devmem_is_allowed(unsigned long pagenr)
 {
-	if (pagenr < 256)
-		return 1;
-	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+	if (page_is_ram(pagenr)) {
+		/*
+		 * For disallowed memory regions in the low 1MB range,
+		 * request that the page be shown as all zeros.
+		 */
+		if (pagenr < 256)
+			return 2;
+
 		return 0;
-	if (!page_is_ram(pagenr))
-		return 1;
-	return 0;
+	}
+
+	/*
+	 * This must follow RAM test, since System RAM is considered a
+	 * restricted resource under CONFIG_STRICT_IOMEM.
+	 */
+	if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) {
+		/* Low 1MB bypasses iomem restrictions. */
+		if (pagenr < 256)
+			return 1;
+
+		return 0;
+	}
+
+	return 1;
 }
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 6d9cc2d39d22..7e4a9d1296bb 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -60,6 +60,10 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
 #endif
 
 #ifdef CONFIG_STRICT_DEVMEM
+static inline int page_is_allowed(unsigned long pfn)
+{
+	return devmem_is_allowed(pfn);
+}
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
 	u64 from = ((u64)pfn) << PAGE_SHIFT;
@@ -75,6 +79,10 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 	return 1;
 }
 #else
+static inline int page_is_allowed(unsigned long pfn)
+{
+	return 1;
+}
 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
 {
 	return 1;
@@ -122,23 +130,31 @@ static ssize_t read_mem(struct file *file, char __user *buf,
 
 	while (count > 0) {
 		unsigned long remaining;
+		int allowed;
 
 		sz = size_inside_page(p, count);
 
-		if (!range_is_allowed(p >> PAGE_SHIFT, count))
+		allowed = page_is_allowed(p >> PAGE_SHIFT);
+		if (!allowed)
 			return -EPERM;
+		if (allowed == 2) {
+			/* Show zeros for restricted memory. */
+			remaining = clear_user(buf, sz);
+		} else {
+			/*
+			 * On ia64 if a page has been mapped somewhere as
+			 * uncached, then it must also be accessed uncached
+			 * by the kernel or data corruption may occur.
+			 */
+			ptr = xlate_dev_mem_ptr(p);
+			if (!ptr)
+				return -EFAULT;
 
-		/*
-		 * On ia64 if a page has been mapped somewhere as uncached, then
-		 * it must also be accessed uncached by the kernel or data
-		 * corruption may occur.
-		 */
-		ptr = xlate_dev_mem_ptr(p);
-		if (!ptr)
-			return -EFAULT;
+			remaining = copy_to_user(buf, ptr, sz);
+
+			unxlate_dev_mem_ptr(p, ptr);
+		}
 
-		remaining = copy_to_user(buf, ptr, sz);
-		unxlate_dev_mem_ptr(p, ptr);
 		if (remaining)
 			return -EFAULT;
 
@@ -181,30 +197,36 @@ static ssize_t write_mem(struct file *file, const char __user *buf,
 #endif
 
 	while (count > 0) {
+		int allowed;
+
 		sz = size_inside_page(p, count);
 
-		if (!range_is_allowed(p >> PAGE_SHIFT, sz))
+		allowed = page_is_allowed(p >> PAGE_SHIFT);
+		if (!allowed)
 			return -EPERM;
 
-		/*
-		 * On ia64 if a page has been mapped somewhere as uncached, then
-		 * it must also be accessed uncached by the kernel or data
-		 * corruption may occur.
-		 */
-		ptr = xlate_dev_mem_ptr(p);
-		if (!ptr) {
-			if (written)
-				break;
-			return -EFAULT;
-		}
+		/* Skip actual writing when a page is marked as restricted. */
+		if (allowed == 1) {
+			/*
+			 * On ia64 if a page has been mapped somewhere as
+			 * uncached, then it must also be accessed uncached
+			 * by the kernel or data corruption may occur.
+			 */
+			ptr = xlate_dev_mem_ptr(p);
+			if (!ptr) {
+				if (written)
+					break;
+				return -EFAULT;
+			}
 
-		copied = copy_from_user(ptr, buf, sz);
-		unxlate_dev_mem_ptr(p, ptr);
-		if (copied) {
-			written += sz - copied;
-			if (written)
-				break;
-			return -EFAULT;
+			copied = copy_from_user(ptr, buf, sz);
+			unxlate_dev_mem_ptr(p, ptr);
+			if (copied) {
+				written += sz - copied;
+				if (written)
+					break;
+				return -EFAULT;
+			}
 		}
 
 		buf += sz;

From 956a4cd2c957acf638ff29951aabaa9d8e92bbc2 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 Apr 2017 16:42:08 -0700
Subject: [PATCH 370/410] device-dax: switch to srcu, fix rcu_read_lock() vs
 pte allocation

The following warning triggers with a new unit test that stresses the
device-dax interface.

 ===============================
 [ ERR: suspicious RCU usage.  ]
 4.11.0-rc4+ #1049 Tainted: G           O
 -------------------------------
 ./include/linux/rcupdate.h:521 Illegal context switch in RCU read-side critical section!

 other info that might help us debug this:

 rcu_scheduler_active = 2, debug_locks = 0
 2 locks held by fio/9070:
  #0:  (&mm->mmap_sem){++++++}, at: [<ffffffff8d0739d7>] __do_page_fault+0x167/0x4f0
  #1:  (rcu_read_lock){......}, at: [<ffffffffc03fbd02>] dax_dev_huge_fault+0x32/0x620 [dax]

 Call Trace:
  dump_stack+0x86/0xc3
  lockdep_rcu_suspicious+0xd7/0x110
  ___might_sleep+0xac/0x250
  __might_sleep+0x4a/0x80
  __alloc_pages_nodemask+0x23a/0x360
  alloc_pages_current+0xa1/0x1f0
  pte_alloc_one+0x17/0x80
  __pte_alloc+0x1e/0x120
  __get_locked_pte+0x1bf/0x1d0
  insert_pfn.isra.70+0x3a/0x100
  ? lookup_memtype+0xa6/0xd0
  vm_insert_mixed+0x64/0x90
  dax_dev_huge_fault+0x520/0x620 [dax]
  ? dax_dev_huge_fault+0x32/0x620 [dax]
  dax_dev_fault+0x10/0x20 [dax]
  __do_fault+0x1e/0x140
  __handle_mm_fault+0x9af/0x10d0
  handle_mm_fault+0x16d/0x370
  ? handle_mm_fault+0x47/0x370
  __do_page_fault+0x28c/0x4f0
  trace_do_page_fault+0x58/0x2a0
  do_async_page_fault+0x1a/0xa0
  async_page_fault+0x28/0x30

Inserting a page table entry may trigger an allocation while we are
holding a read lock to keep the device instance alive for the duration
of the fault. Use srcu for this keep-alive protection.

Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap")
Cc: <stable@vger.kernel.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/Kconfig |  1 +
 drivers/dax/dax.c   | 13 +++++++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index 3e2ab3b14eea..9e95bf94eb13 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -2,6 +2,7 @@ menuconfig DEV_DAX
 	tristate "DAX: direct access to differentiated memory"
 	default m if NVDIMM_DAX
 	depends on TRANSPARENT_HUGEPAGE
+	select SRCU
 	help
 	  Support raw access to differentiated (persistence, bandwidth,
 	  latency...) memory via an mmap(2) capable character
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 80c6db279ae1..806f180c80d8 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -25,6 +25,7 @@
 #include "dax.h"
 
 static dev_t dax_devt;
+DEFINE_STATIC_SRCU(dax_srcu);
 static struct class *dax_class;
 static DEFINE_IDA(dax_minor_ida);
 static int nr_dax = CONFIG_NR_DEV_DAX;
@@ -60,7 +61,7 @@ struct dax_region {
  * @region - parent region
  * @dev - device backing the character device
  * @cdev - core chardev data
- * @alive - !alive + rcu grace period == no new mappings can be established
+ * @alive - !alive + srcu grace period == no new mappings can be established
  * @id - child id in the region
  * @num_resources - number of physical address extents in this device
  * @res - array of physical address ranges
@@ -569,7 +570,7 @@ static int __dax_dev_pud_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
 static int dax_dev_huge_fault(struct vm_fault *vmf,
 		enum page_entry_size pe_size)
 {
-	int rc;
+	int rc, id;
 	struct file *filp = vmf->vma->vm_file;
 	struct dax_dev *dax_dev = filp->private_data;
 
@@ -578,7 +579,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
 			? "write" : "read",
 			vmf->vma->vm_start, vmf->vma->vm_end);
 
-	rcu_read_lock();
+	id = srcu_read_lock(&dax_srcu);
 	switch (pe_size) {
 	case PE_SIZE_PTE:
 		rc = __dax_dev_pte_fault(dax_dev, vmf);
@@ -592,7 +593,7 @@ static int dax_dev_huge_fault(struct vm_fault *vmf,
 	default:
 		return VM_FAULT_FALLBACK;
 	}
-	rcu_read_unlock();
+	srcu_read_unlock(&dax_srcu, id);
 
 	return rc;
 }
@@ -713,11 +714,11 @@ static void unregister_dax_dev(void *dev)
 	 * Note, rcu is not protecting the liveness of dax_dev, rcu is
 	 * ensuring that any fault handlers that might have seen
 	 * dax_dev->alive == true, have completed.  Any fault handlers
-	 * that start after synchronize_rcu() has started will abort
+	 * that start after synchronize_srcu() has started will abort
 	 * upon seeing dax_dev->alive == false.
 	 */
 	dax_dev->alive = false;
-	synchronize_rcu();
+	synchronize_srcu(&dax_srcu);
 	unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
 	cdev_del(cdev);
 	device_unregister(dev);

From 11e63f6d920d6f2dfd3cd421e939a4aec9a58dcd Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 6 Apr 2017 09:04:31 -0700
Subject: [PATCH 371/410] x86, pmem: fix broken __copy_user_nocache
 cache-bypass assumptions

Before we rework the "pmem api" to stop abusing __copy_user_nocache()
for memcpy_to_pmem() we need to fix cases where we may strand dirty data
in the cpu cache. The problem occurs when copy_from_iter_pmem() is used
for arbitrary data transfers from userspace. There is no guarantee that
these transfers, performed by dax_iomap_actor(), will have aligned
destinations or aligned transfer lengths. Backstop the usage
__copy_user_nocache() with explicit cache management in these unaligned
cases.

Yes, copy_from_iter_pmem() is now too big for an inline, but addressing
that is saved for a later patch that moves the entirety of the "pmem
api" into the pmem driver directly.

Fixes: 5de490daec8b ("pmem: add copy_from_iter_pmem() and clear_pmem()")
Cc: <stable@vger.kernel.org>
Cc: <x86@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/include/asm/pmem.h | 42 +++++++++++++++++++++++++++----------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 2c1ebeb4d737..529bb4a6487a 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -55,7 +55,8 @@ static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
  * @size:	number of bytes to write back
  *
  * Write back a cache range using the CLWB (cache line write back)
- * instruction.
+ * instruction. Note that @size is internally rounded up to be cache
+ * line size aligned.
  */
 static inline void arch_wb_cache_pmem(void *addr, size_t size)
 {
@@ -69,15 +70,6 @@ static inline void arch_wb_cache_pmem(void *addr, size_t size)
 		clwb(p);
 }
 
-/*
- * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
- * iterators, so for other types (bvec & kvec) we must do a cache write-back.
- */
-static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
-{
-	return iter_is_iovec(i) == false;
-}
-
 /**
  * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
  * @addr:	PMEM destination address
@@ -94,7 +86,35 @@ static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
 	/* TODO: skip the write-back by always using non-temporal stores */
 	len = copy_from_iter_nocache(addr, bytes, i);
 
-	if (__iter_needs_pmem_wb(i))
+	/*
+	 * In the iovec case on x86_64 copy_from_iter_nocache() uses
+	 * non-temporal stores for the bulk of the transfer, but we need
+	 * to manually flush if the transfer is unaligned. A cached
+	 * memory copy is used when destination or size is not naturally
+	 * aligned. That is:
+	 *   - Require 8-byte alignment when size is 8 bytes or larger.
+	 *   - Require 4-byte alignment when size is 4 bytes.
+	 *
+	 * In the non-iovec case the entire destination needs to be
+	 * flushed.
+	 */
+	if (iter_is_iovec(i)) {
+		unsigned long flushed, dest = (unsigned long) addr;
+
+		if (bytes < 8) {
+			if (!IS_ALIGNED(dest, 4) || (bytes != 4))
+				arch_wb_cache_pmem(addr, 1);
+		} else {
+			if (!IS_ALIGNED(dest, 8)) {
+				dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
+				arch_wb_cache_pmem(addr, 1);
+			}
+
+			flushed = dest - (unsigned long) addr;
+			if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
+				arch_wb_cache_pmem(addr + bytes - 1, 1);
+		}
+	} else
 		arch_wb_cache_pmem(addr, bytes);
 
 	return len;

From 818249216d7dc9340a7d2332f6d07b462e818ee5 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 4 Mar 2017 17:23:07 -0500
Subject: [PATCH 372/410] tools/power turbostat: show missing Core and GFX
 power on SKL and KBL

While the current SDM is silent on the matter, the Core and GFX
RAPL power meters on SKL and KBL appear to work -- so show them.

Reported-by: Yaroslav Isakov <yaroslav.isakov@gmail.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d7fb6bcb2744..94d7e54b1ec0 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3355,17 +3355,19 @@ void rapl_probe(unsigned int family, unsigned int model)
 	case INTEL_FAM6_SKYLAKE_DESKTOP:	/* SKL */
 	case INTEL_FAM6_KABYLAKE_MOBILE:	/* KBL */
 	case INTEL_FAM6_KABYLAKE_DESKTOP:	/* KBL */
-		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
 		BIC_PRESENT(BIC_PKG__);
 		BIC_PRESENT(BIC_RAM__);
 		if (rapl_joules) {
 			BIC_PRESENT(BIC_Pkg_J);
 			BIC_PRESENT(BIC_Cor_J);
 			BIC_PRESENT(BIC_RAM_J);
+			BIC_PRESENT(BIC_GFX_J);
 		} else {
 			BIC_PRESENT(BIC_PkgWatt);
 			BIC_PRESENT(BIC_CorWatt);
 			BIC_PRESENT(BIC_RAMWatt);
+			BIC_PRESENT(BIC_GFXWatt);
 		}
 		break;
 	case INTEL_FAM6_HASWELL_X:	/* HSX */

From f4896fa502b81c5bce93f375bd17b14725c01826 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 4 Mar 2017 18:10:45 -0500
Subject: [PATCH 373/410] tools/power turbostat: enable package THERM_INTERRUPT
 dump

cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884b0800 (25 C)
cpu0: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x00000003 (100 C, 100 C)

Enable the same per-core output, but hide it behind --debug
because it is too verbose on big systems.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 94d7e54b1ec0..e95799966560 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3482,7 +3482,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
 int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
 	unsigned long long msr;
-	unsigned int dts;
+	unsigned int dts, dts2;
 	int cpu;
 
 	if (!(do_dts || do_ptm))
@@ -3507,7 +3507,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
 			cpu, msr, tcc_activation_temp - dts);
 
-#ifdef	THERM_DEBUG
 		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
 			return 0;
 
@@ -3515,11 +3514,10 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 		dts2 = (msr >> 8) & 0x7F;
 		fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
-#endif
 	}
 
 
-	if (do_dts) {
+	if (do_dts && debug) {
 		unsigned int resolution;
 
 		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
@@ -3530,7 +3528,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 		fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
 			cpu, msr, tcc_activation_temp - dts, resolution);
 
-#ifdef THERM_DEBUG
 		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
 			return 0;
 
@@ -3538,7 +3535,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
 		dts2 = (msr >> 8) & 0x7F;
 		fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
 			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
-#endif
 	}
 
 	return 0;

From 6dbd25a24599db07d88a805615bad1f4d48a5749 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 4 Mar 2017 18:18:28 -0500
Subject: [PATCH 374/410] tools/power turbostat: update HWP dump to decimal
 from hex

Syntax only.

The HWP CAPABILTIES and REQUEST ratios are more easily
viewed in decimal -- just multiply by 100 and you get MHz...

new:
cpu0: MSR_HWP_CAPABILITIES: 0x010c1b23 (high 35 guar 27 eff 12 low 1)
cpu0: MSR_HWP_REQUEST: 0x80002301 (min 1 max 35 des 0 epp 0x80 window 0x0 pkg 0x0)

old:
cpu0: MSR_HWP_CAPABILITIES: 0x010c1b23 (high 0x23 guar 0x1b eff 0xc low 0x1)
cpu0: MSR_HWP_REQUEST: 0x80002301 (min 0x1 max 0x23 des 0x0 epp 0x80 window 0x0 pkg 0x0)

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index e95799966560..b0591d0da801 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3113,7 +3113,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return 0;
 
 	fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
-			"(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
+			"(high %d guar %d eff %d low %d)\n",
 			cpu, msr,
 			(unsigned int)HWP_HIGHEST_PERF(msr),
 			(unsigned int)HWP_GUARANTEED_PERF(msr),
@@ -3124,7 +3124,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 		return 0;
 
 	fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
-			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
+			"(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
 			cpu, msr,
 			(unsigned int)(((msr) >> 0) & 0xff),
 			(unsigned int)(((msr) >> 8) & 0xff),
@@ -3138,7 +3138,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return 0;
 
 		fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
-			"(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
+			"(min %d max %d des %d epp 0x%x window 0x%x)\n",
 			cpu, msr,
 			(unsigned int)(((msr) >> 0) & 0xff),
 			(unsigned int)(((msr) >> 8) & 0xff),

From ab23d1146a8e7bd045507fe8a380827dc03e056d Mon Sep 17 00:00:00 2001
From: Doug Smythies <doug.smythies@gmail.com>
Date: Sat, 4 Mar 2017 14:48:49 -0800
Subject: [PATCH 375/410] tools/power turbostat: turbostat.8 add missing column
 definitions

Add GFX%rc6 and GFXMHz to the column descriptions section
of the turbostat man page.

Signed-off-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index fedca3285326..ccf2a69365cc 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -100,6 +100,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics
 \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.  These numbers are from hardware residency counters.
 \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
 \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
 \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.  These numbers are from hardware residency counters.
 \fBPkgWatt\fP Watts consumed by the whole package.
 \fBCorWatt\fP Watts consumed by the core part of the package.

From 95149369c1c28b10f7318dfde54018ab107277d0 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 12 Apr 2017 19:44:51 -0400
Subject: [PATCH 376/410] tools/power turbostat: fix impossibly large CPU%c1
 value

Most CPUs do not have a hardware c1 counter,
and so turbostat derives c1 residency:

c1 = TSC - MPERF - other_core_cstate_counters

As it is not possible to atomically read these coutners,
measurement jitter can case this calcuation to "go negative"
when very close to 0.  Turbostat detect that case and
simply prints c1 = 0.00%

But that check neglected to account for systems where the TSC
crystal clock domain and the MPERF BCLK domain are differ by
a small amount.  That allowed very small negative c1 numbers
to escape this check and be printed as huge positve numbers.

This code begs for a bit of cleanup, but this patch
is the minimal change to fix the issue.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index b0591d0da801..0ad966114e58 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1142,7 +1142,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 		 * it is possible for mperf's non-halted cycles + idle states
 		 * to exceed TSC's all cycles: show c1 = 0% in that case.
 		 */
-		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
+		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
 			old->c1 = 0;
 		else {
 			/* normal case, derive c1 */

From 5f9bf02a58f0f62d111994805212d0a775499862 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Sat, 4 Mar 2017 17:26:29 -0500
Subject: [PATCH 377/410] tools/power turbostat: update version number

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0ad966114e58..b11294730771 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4578,7 +4578,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-	fprintf(outf, "turbostat version 17.02.24"
+	fprintf(outf, "turbostat version 17.04.12"
 		" - Len Brown <lenb@kernel.org>\n");
 }
 

From c4a3fa261b16858416f1fd7db03a33d7ef5fc0b3 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Sun, 9 Apr 2017 13:45:16 +0800
Subject: [PATCH 378/410] cpufreq: Bring CPUs up even if cpufreq_online()
 failed

There is a report that after commit 27622b061eb4 ("cpufreq: Convert
to hotplug state machine"), the normal CPU offline/online cycle
fails on some platforms.

According to the ftrace result, this problem was triggered on
platforms using acpi-cpufreq as the default cpufreq driver,
and due to the lack of some ACPI freq method (eg. _PCT),
cpufreq_online() failed and returned a negative value, so the CPU
hotplug state machine rolled back the CPU online process.  Actually,
from the user's perspective, the failure of cpufreq_online() should
not prevent that CPU from being brought up, although cpufreq might
not work on that CPU.

BTW, during system startup cpufreq_online() is not invoked via CPU
online but by the cpufreq device creation process, so the APs can be
brought up even though cpufreq_online() fails in that stage.

This patch ignores the return value of cpufreq_online/offline() and
lets the cpufreq framework deal with the failure.  cpufreq_online()
itself will do a proper rollback in that case and if _PCT is missing,
the ACPI cpufreq driver will print a warning if the corresponding
debug options have been enabled.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=194581
Fixes: 27622b061eb4 ("cpufreq: Convert to hotplug state machine")
Reported-and-tested-by: Tomasz Maciej Nowak <tmn505@gmail.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: 4.9+ <stable@vger.kernel.org> # 4.9+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index bc96d423781a..0e3f6496524d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2398,6 +2398,20 @@ EXPORT_SYMBOL_GPL(cpufreq_boost_enabled);
  *********************************************************************/
 static enum cpuhp_state hp_online;
 
+static int cpuhp_cpufreq_online(unsigned int cpu)
+{
+	cpufreq_online(cpu);
+
+	return 0;
+}
+
+static int cpuhp_cpufreq_offline(unsigned int cpu)
+{
+	cpufreq_offline(cpu);
+
+	return 0;
+}
+
 /**
  * cpufreq_register_driver - register a CPU Frequency driver
  * @driver_data: A struct cpufreq_driver containing the values#
@@ -2460,8 +2474,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	}
 
 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
-					cpufreq_online,
-					cpufreq_offline);
+					cpuhp_cpufreq_online,
+					cpuhp_cpufreq_offline);
 	if (ret < 0)
 		goto err_if_unreg;
 	hp_online = ret;

From f406270bf73d71ea7b35ee3f7a08a44f6594c9b1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 11 Apr 2017 00:23:42 +0200
Subject: [PATCH 379/410] ACPI / scan: Set the visited flag for all enumerated
 devices

Commit 10c7e20b2ff3 (ACPI / scan: fix enumeration (visited) flags for
bus rescans) attempted to fix a problem with ACPI-based enumerateion
of I2C/SPI devices, but it forgot to ensure that the visited flag
will be set for all of the other enumerated devices, so fix that.

Fixes: 10c7e20b2ff3 (ACPI / scan: fix enumeration (visited) flags for bus rescans)
Link: https://bugzilla.kernel.org/show_bug.cgi?id=194885
Reported-and-tested-by: Kevin Locke <kevin@kevinlocke.name>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: 4.8+ <stable@vger.kernel.org> # 4.8+
---
 drivers/acpi/scan.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 192691880d55..2433569b02ef 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1857,15 +1857,20 @@ static void acpi_bus_attach(struct acpi_device *device)
 		return;
 
 	device->flags.match_driver = true;
-	if (!ret) {
-		ret = device_attach(&device->dev);
-		if (ret < 0)
-			return;
-
-		if (!ret && device->pnp.type.platform_id)
-			acpi_default_enumeration(device);
+	if (ret > 0) {
+		acpi_device_set_enumerated(device);
+		goto ok;
 	}
 
+	ret = device_attach(&device->dev);
+	if (ret < 0)
+		return;
+
+	if (ret > 0 || !device->pnp.type.platform_id)
+		acpi_device_set_enumerated(device);
+	else
+		acpi_default_enumeration(device);
+
  ok:
 	list_for_each_entry(child, &device->children, node)
 		acpi_bus_attach(child);

From 6f6266a561306e206e0e31a5038f029b6a7b1d89 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 12 Apr 2017 16:27:19 +0100
Subject: [PATCH 380/410] x86/efi: Don't try to reserve runtime regions

Reserving a runtime region results in splitting the EFI memory
descriptors for the runtime region. This results in runtime region
descriptors with bogus memory mappings, leading to interesting crashes
like the following during a kexec:

  general protection fault: 0000 [#1] SMP
  Modules linked in:
  CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.11.0-rc1 #53
  Hardware name: Wiwynn Leopard-Orv2/Leopard-DDR BW, BIOS LBM05   09/30/2016
  RIP: 0010:virt_efi_set_variable()
  ...
  Call Trace:
   efi_delete_dummy_variable()
   efi_enter_virtual_mode()
   start_kernel()
   ? set_init_arg()
   x86_64_start_reservations()
   x86_64_start_kernel()
   start_cpu()
  ...
  Kernel panic - not syncing: Fatal exception

Runtime regions will not be freed and do not need to be reserved, so
skip the memmap modification in this case.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: <stable@vger.kernel.org> # v4.9+
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Jones <pjones@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 8e80632fb23f ("efi/esrt: Use efi_mem_reserve() and avoid a kmalloc()")
Link: http://lkml.kernel.org/r/20170412152719.9779-2-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/quirks.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 30031d5293c4..cdfe8c628959 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -201,6 +201,10 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
 		return;
 	}
 
+	/* No need to reserve regions that will never be freed. */
+	if (md.attribute & EFI_MEMORY_RUNTIME)
+		return;
+
 	size += addr % EFI_PAGE_SIZE;
 	size = round_up(size, EFI_PAGE_SIZE);
 	addr = round_down(addr, EFI_PAGE_SIZE);

From 4cca0457686e4ee1677d69469e4ddfd94d389a80 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Tue, 11 Apr 2017 00:29:44 +0100
Subject: [PATCH 381/410] cpupower: Fix turbo frequency reporting for pre-Sandy
 Bridge cores

The switch that conditionally sets CPUPOWER_CAP_HAS_TURBO_RATIO and
CPUPOWER_CAP_IS_SNB flags is missing a break, so all cores get both
flags set and an assumed base clock of 100 MHz for turbo values.

Reported-by: GSR <gsr.bugs@infernal-iceberg.com>
Tested-by: GSR <gsr.bugs@infernal-iceberg.com>
References: https://bugs.debian.org/859978
Fixes: 8fb2e440b223 (cpupower: Show Intel turbo ratio support via ...)
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/cpupower/utils/helpers/cpuid.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
index 93b0aa74ca03..39c2c7d067bb 100644
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -156,6 +156,7 @@ out:
 					 */
 			case 0x2C:	/* Westmere EP - Gulftown */
 				cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO;
+				break;
 			case 0x2A:	/* SNB */
 			case 0x2D:	/* SNB Xeon */
 			case 0x3A:	/* IVB */

From 67dbea2ce6873f8ba57988ba3e608e8bf61c347f Mon Sep 17 00:00:00 2001
From: Pavel Shilovsky <pshilov@microsoft.com>
Date: Wed, 12 Apr 2017 13:32:07 -0700
Subject: [PATCH 382/410] CIFS: Fix SMB3 mount without specifying a security
 mechanism

Commit ef65aaede23f ("smb2: Enforce sec= mount option") changed the
behavior of a mount command to enforce a specified security mechanism
during mounting. On another hand according to the spec if SMB3 server
doesn't respond with a security context it implies that it supports
NTLMSSP. The current code doesn't keep it in mind and fails a mount
for such servers if no security mechanism is specified. Fix this by
indicating that a server supports NTLMSSP if a security context isn't
returned during negotiate phase. This allows the code to use NTLMSSP
by default for SMB3 mounts.

Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
---
 fs/cifs/smb2pdu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 1bd5d3033fc8..02da648041fc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -562,8 +562,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	 * but for time being this is our only auth choice so doesn't matter.
 	 * We just found a server which sets blob length to zero expecting raw.
 	 */
-	if (blob_length == 0)
+	if (blob_length == 0) {
 		cifs_dbg(FYI, "missing security blob on negprot\n");
+		server->sec_ntlmssp = true;
+	}
 
 	rc = cifs_enable_signing(server, ses->sign);
 	if (rc)

From 1315f01632da417f1f27074bc6631df8eaf223bb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 13 Apr 2017 18:14:55 +0200
Subject: [PATCH 383/410] Revert "ACPICA: Resources: Not a valid resource if
 buffer length too long"

Revert commit 57707a9a7780 (ACPICA: Resources: Not a valid resource if
buffer length too long) as it is reported to prevent the TPM module
from loading on Lenovo X60 with Coreboot.

It also causes new confusing warnings to show up in the kernel log.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=195311
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utresrc.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/acpi/acpica/utresrc.c b/drivers/acpi/acpica/utresrc.c
index c86bae7b1d0f..ff096d9755b9 100644
--- a/drivers/acpi/acpica/utresrc.c
+++ b/drivers/acpi/acpica/utresrc.c
@@ -421,10 +421,8 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
 
 	ACPI_FUNCTION_TRACE(ut_walk_aml_resources);
 
-	/*
-	 * The absolute minimum resource template is one end_tag descriptor.
-	 * However, we will treat a lone end_tag as just a simple buffer.
-	 */
+	/* The absolute minimum resource template is one end_tag descriptor */
+
 	if (aml_length < sizeof(struct aml_resource_end_tag)) {
 		return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
 	}
@@ -456,8 +454,9 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
 		/* Invoke the user function */
 
 		if (user_function) {
-			status = user_function(aml, length, offset,
-					       resource_index, context);
+			status =
+			    user_function(aml, length, offset, resource_index,
+					  context);
 			if (ACPI_FAILURE(status)) {
 				return_ACPI_STATUS(status);
 			}
@@ -481,12 +480,6 @@ acpi_ut_walk_aml_resources(struct acpi_walk_state *walk_state,
 				*context = aml;
 			}
 
-			/* Check if buffer is defined to be longer than the resource length */
-
-			if (aml_length > (offset + length)) {
-				return_ACPI_STATUS(AE_AML_NO_RESOURCE_END_TAG);
-			}
-
 			/* Normal exit */
 
 			return_ACPI_STATUS(AE_OK);

From 14cf4a771b3098e431d2677e3533bdd962e478d8 Mon Sep 17 00:00:00 2001
From: Daniele Palmas <dnlplm@gmail.com>
Date: Mon, 10 Apr 2017 17:34:23 +0200
Subject: [PATCH 384/410] drivers: net: usb: qmi_wwan: add QMI_QUIRK_SET_DTR
 for Telit PID 0x1201

Telit LE920A4 uses the same pid 0x1201 of LE920, but modem
implementation is different, since it requires DTR to be set for
answering to qmi messages.

This patch replaces QMI_FIXED_INTF with QMI_QUIRK_SET_DTR: tests on
LE920 have been performed in order to verify backward compatibility.

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 156f7f85e486..2474618404f5 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -908,7 +908,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x2357, 0x9000, 4)},	/* TP-LINK MA260 */
 	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)},	/* Telit LE922A */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
-	{QMI_FIXED_INTF(0x1bc7, 0x1201, 2)},	/* Telit LE920 */
+	{QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)},	/* Telit LE920, LE920A4 */
 	{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)},	/* XS Stick W100-2 from 4G Systems */
 	{QMI_FIXED_INTF(0x0b3c, 0xc000, 4)},	/* Olivetti Olicard 100 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc001, 4)},	/* Olivetti Olicard 120 */

From 79e09ef96b6a5fb888f5241f3aa707e9ad0b1cce Mon Sep 17 00:00:00 2001
From: Liping Zhang <zlpnobody@gmail.com>
Date: Mon, 3 Apr 2017 16:34:38 +0800
Subject: [PATCH 385/410] netfilter: nft_hash: do not dump the auto generated
 seed

This can prevent the nft utility from printing out the auto generated
seed to the user, which is unnecessary and confusing.

Fixes: cb1b69b0b15b ("netfilter: nf_tables: add hash expression")
Signed-off-by: Liping Zhang <zlpnobody@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_hash.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index eb2721af898d..c4dad1254ead 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -21,6 +21,7 @@ struct nft_hash {
 	enum nft_registers      sreg:8;
 	enum nft_registers      dreg:8;
 	u8			len;
+	bool			autogen_seed:1;
 	u32			modulus;
 	u32			seed;
 	u32			offset;
@@ -82,10 +83,12 @@ static int nft_hash_init(const struct nft_ctx *ctx,
 	if (priv->offset + priv->modulus - 1 < priv->offset)
 		return -EOVERFLOW;
 
-	if (tb[NFTA_HASH_SEED])
+	if (tb[NFTA_HASH_SEED]) {
 		priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
-	else
+	} else {
+		priv->autogen_seed = true;
 		get_random_bytes(&priv->seed, sizeof(priv->seed));
+	}
 
 	return nft_validate_register_load(priv->sreg, len) &&
 	       nft_validate_register_store(ctx, priv->dreg, NULL,
@@ -105,7 +108,8 @@ static int nft_hash_dump(struct sk_buff *skb,
 		goto nla_put_failure;
 	if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
 		goto nla_put_failure;
-	if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
+	if (!priv->autogen_seed &&
+	    nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
 		goto nla_put_failure;
 	if (priv->offset != 0)
 		if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))

From fe50543c194e2e1aee2f3eba41fcafd187b3dbde Mon Sep 17 00:00:00 2001
From: Gao Feng <fgao@ikuai8.com>
Date: Thu, 6 Apr 2017 09:45:22 +0800
Subject: [PATCH 386/410] netfilter: ipt_CLUSTERIP: Fix wrong conntrack netns
 refcnt usage

Current codes invoke wrongly nf_ct_netns_get in the destroy routine,
it should use nf_ct_netns_put, not nf_ct_netns_get.
It could cause some modules could not be unloaded.

Fixes: ecb2421b5ddf ("netfilter: add and use nf_ct_netns_get/put")
Signed-off-by: Gao Feng <fgao@ikuai8.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 52f26459efc3..9b8841316e7b 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -461,7 +461,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 
 	clusterip_config_put(cipinfo->config);
 
-	nf_ct_netns_get(par->net, par->family);
+	nf_ct_netns_put(par->net, par->family);
 }
 
 #ifdef CONFIG_COMPAT

From 3412386b531244f24a27c79ee003506a52a00848 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 13 Apr 2017 13:28:12 -0400
Subject: [PATCH 387/410] irq/affinity: Fix extra vecs calculation

This fixes a math error calculating the extra_vecs. The error assumed
only 1 cpu per vector, but the value needs to account for the actual
number of cpus per vector in order to get the correct remainder for
extra CPU assignment.

Fixes: 7bf8222b9bd0 ("irq/affinity: Fix CPU spread for unbalanced nodes")
Reported-by: Xiaolong Ye <xiaolong.ye@intel.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Link: http://lkml.kernel.org/r/1492104492-19943-1-git-send-email-keith.busch@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/irq/affinity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index dc529116f7e6..d052947fe785 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -108,7 +108,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 		vecs_to_assign = min(vecs_per_node, ncpus);
 
 		/* Account for rounding errors */
-		extra_vecs = ncpus - vecs_to_assign;
+		extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
 
 		for (v = 0; curvec < last_affv && v < vecs_to_assign;
 		     curvec++, v++) {

From d8a6e3aed955342dd68dd72e0feaf2cd32be683b Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Thu, 13 Apr 2017 11:06:00 -0700
Subject: [PATCH 388/410] ia64: restore symbol versions for symbols defined in
 assembly

The ia64 build generates many warnings like this:

   WARNING: EXPORT symbol "empty_zero_page" [vmlinux] version generation failed, symbol will not be versioned.

Besides adding the necessary header this also requires fiddling with
some explicit .S -> .o rules.

Cc: IA64-ML <linux-ia64@vger.kernel.org>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/asm-prototypes.h | 29 ++++++++++++++++++++++++++
 arch/ia64/lib/Makefile                 | 16 +++++++-------
 2 files changed, 37 insertions(+), 8 deletions(-)
 create mode 100644 arch/ia64/include/asm/asm-prototypes.h

diff --git a/arch/ia64/include/asm/asm-prototypes.h b/arch/ia64/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..a2c139808cfe
--- /dev/null
+++ b/arch/ia64/include/asm/asm-prototypes.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_IA64_ASM_PROTOTYPES_H
+#define _ASM_IA64_ASM_PROTOTYPES_H
+
+#include <asm/cacheflush.h>
+#include <asm/checksum.h>
+#include <asm/esi.h>
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/pal.h>
+#include <asm/string.h>
+#include <asm/uaccess.h>
+#include <asm/unwind.h>
+#include <asm/xor.h>
+
+extern const char ia64_ivt[];
+
+signed int __divsi3(signed int, unsigned int);
+signed int __modsi3(signed int, unsigned int);
+
+signed long long __divdi3(signed long long, unsigned long long);
+signed long long __moddi3(signed long long, unsigned long long);
+
+unsigned int __udivsi3(unsigned int, unsigned int);
+unsigned int __umodsi3(unsigned int, unsigned int);
+
+unsigned long long __udivdi3(unsigned long long, unsigned long long);
+unsigned long long __umoddi3(unsigned long long, unsigned long long);
+
+#endif /* _ASM_IA64_ASM_PROTOTYPES_H */
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
index 1f3d3877618f..0a40b14407b1 100644
--- a/arch/ia64/lib/Makefile
+++ b/arch/ia64/lib/Makefile
@@ -24,25 +24,25 @@ AFLAGS___modsi3.o	=	     -DMODULO
 AFLAGS___umodsi3.o	= -DUNSIGNED -DMODULO
 
 $(obj)/__divdi3.o: $(src)/idiv64.S FORCE
-	$(call if_changed_dep,as_o_S)
+	$(call if_changed_rule,as_o_S)
 
 $(obj)/__udivdi3.o: $(src)/idiv64.S FORCE
-	$(call if_changed_dep,as_o_S)
+	$(call if_changed_rule,as_o_S)
 
 $(obj)/__moddi3.o: $(src)/idiv64.S FORCE
-	$(call if_changed_dep,as_o_S)
+	$(call if_changed_rule,as_o_S)
 
 $(obj)/__umoddi3.o: $(src)/idiv64.S FORCE
-	$(call if_changed_dep,as_o_S)
+	$(call if_changed_rule,as_o_S)
 
 $(obj)/__divsi3.o: $(src)/idiv32.S FORCE
-	$(call if_changed_dep,as_o_S)
+	$(call if_changed_rule,as_o_S)
 
 $(obj)/__udivsi3.o: $(src)/idiv32.S FORCE
-	$(call if_changed_dep,as_o_S)
+	$(call if_changed_rule,as_o_S)
 
 $(obj)/__modsi3.o: $(src)/idiv32.S FORCE
-	$(call if_changed_dep,as_o_S)
+	$(call if_changed_rule,as_o_S)
 
 $(obj)/__umodsi3.o: $(src)/idiv32.S FORCE
-	$(call if_changed_dep,as_o_S)
+	$(call if_changed_rule,as_o_S)

From 76e32a2a084ed71b48179023cd8fdb3787c8a6ad Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vitalywool@gmail.com>
Date: Thu, 13 Apr 2017 14:56:14 -0700
Subject: [PATCH 389/410] z3fold: fix page locking in z3fold_alloc()

Stress testing of the current z3fold implementation on a 8-core system
revealed it was possible that a z3fold page deleted from its unbuddied
list in z3fold_alloc() would be put on another unbuddied list by
z3fold_free() while z3fold_alloc() is still processing it.  This has
been introduced with commit 5a27aa822 ("z3fold: add kref refcounting")
due to the removal of special handling of a z3fold page not on any list
in z3fold_free().

To fix this, the z3fold page lock should be taken in z3fold_alloc()
before the pool lock is released.  To avoid deadlocking, we just try to
lock the page as soon as we get a hold of it, and if trylock fails, we
drop this page and take the next one.

Signed-off-by: Vitaly Wool <vitalywool@gmail.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: <Oleksiy.Avramchenko@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/z3fold.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index f9492bccfd79..54f63c4a809a 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr)
 	spin_lock(&zhdr->page_lock);
 }
 
+/* Try to lock a z3fold page */
+static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
+{
+	return spin_trylock(&zhdr->page_lock);
+}
+
 /* Unlock a z3fold page */
 static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
 {
@@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 			spin_lock(&pool->lock);
 			zhdr = list_first_entry_or_null(&pool->unbuddied[i],
 						struct z3fold_header, buddy);
-			if (!zhdr) {
+			if (!zhdr || !z3fold_page_trylock(zhdr)) {
 				spin_unlock(&pool->lock);
 				continue;
 			}
@@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
 			spin_unlock(&pool->lock);
 
 			page = virt_to_page(zhdr);
-			z3fold_page_lock(zhdr);
 			if (zhdr->first_chunks == 0) {
 				if (zhdr->middle_chunks != 0 &&
 				    chunks >= zhdr->start_middle)

From 0a85e51d37645e9ce57e5e1a30859e07810ed07c Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 13 Apr 2017 14:56:17 -0700
Subject: [PATCH 390/410] thp: reduce indentation level in change_huge_pmd()

Patch series "thp: fix few MADV_DONTNEED races"

For MADV_DONTNEED to work properly with huge pages, it's critical to not
clear pmd intermittently unless you hold down_write(mmap_sem).

Otherwise MADV_DONTNEED can miss the THP which can lead to userspace
breakage.

See example of such race in commit message of patch 2/4.

All these races are found by code inspection.  I haven't seen them
triggered.  I don't think it's worth to apply them to stable@.

This patch (of 4):

Restructure code in preparation for a fix.

Link: http://lkml.kernel.org/r/20170302151034.27829-2-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 52 ++++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fef4cf210cc7..a513861a9037 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1724,37 +1724,37 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	spinlock_t *ptl;
-	int ret = 0;
+	pmd_t entry;
+	bool preserve_write;
+	int ret;
 
 	ptl = __pmd_trans_huge_lock(pmd, vma);
-	if (ptl) {
-		pmd_t entry;
-		bool preserve_write = prot_numa && pmd_write(*pmd);
-		ret = 1;
+	if (!ptl)
+		return 0;
 
-		/*
-		 * Avoid trapping faults against the zero page. The read-only
-		 * data is likely to be read-cached on the local CPU and
-		 * local/remote hits to the zero page are not interesting.
-		 */
-		if (prot_numa && is_huge_zero_pmd(*pmd)) {
-			spin_unlock(ptl);
-			return ret;
-		}
+	preserve_write = prot_numa && pmd_write(*pmd);
+	ret = 1;
 
-		if (!prot_numa || !pmd_protnone(*pmd)) {
-			entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
-			entry = pmd_modify(entry, newprot);
-			if (preserve_write)
-				entry = pmd_mk_savedwrite(entry);
-			ret = HPAGE_PMD_NR;
-			set_pmd_at(mm, addr, pmd, entry);
-			BUG_ON(vma_is_anonymous(vma) && !preserve_write &&
-					pmd_write(entry));
-		}
-		spin_unlock(ptl);
-	}
+	/*
+	 * Avoid trapping faults against the zero page. The read-only
+	 * data is likely to be read-cached on the local CPU and
+	 * local/remote hits to the zero page are not interesting.
+	 */
+	if (prot_numa && is_huge_zero_pmd(*pmd))
+		goto unlock;
 
+	if (prot_numa && pmd_protnone(*pmd))
+		goto unlock;
+
+	entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
+	entry = pmd_modify(entry, newprot);
+	if (preserve_write)
+		entry = pmd_mk_savedwrite(entry);
+	ret = HPAGE_PMD_NR;
+	set_pmd_at(mm, addr, pmd, entry);
+	BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
+unlock:
+	spin_unlock(ptl);
 	return ret;
 }
 

From ced108037c2aa542b3ed8b7afd1576064ad1362a Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 13 Apr 2017 14:56:20 -0700
Subject: [PATCH 391/410] thp: fix MADV_DONTNEED vs. numa balancing race

In case prot_numa, we are under down_read(mmap_sem).  It's critical to
not clear pmd intermittently to avoid race with MADV_DONTNEED which is
also under down_read(mmap_sem):

	CPU0:				CPU1:
				change_huge_pmd(prot_numa=1)
				 pmdp_huge_get_and_clear_notify()
madvise_dontneed()
 zap_pmd_range()
  pmd_trans_huge(*pmd) == 0 (without ptl)
  // skip the pmd
				 set_pmd_at();
				 // pmd is re-established

The race makes MADV_DONTNEED miss the huge pmd and don't clear it
which may break userspace.

Found by code analysis, never saw triggered.

Link: http://lkml.kernel.org/r/20170302151034.27829-3-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a513861a9037..26769465af63 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1746,7 +1746,39 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 	if (prot_numa && pmd_protnone(*pmd))
 		goto unlock;
 
-	entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
+	/*
+	 * In case prot_numa, we are under down_read(mmap_sem). It's critical
+	 * to not clear pmd intermittently to avoid race with MADV_DONTNEED
+	 * which is also under down_read(mmap_sem):
+	 *
+	 *	CPU0:				CPU1:
+	 *				change_huge_pmd(prot_numa=1)
+	 *				 pmdp_huge_get_and_clear_notify()
+	 * madvise_dontneed()
+	 *  zap_pmd_range()
+	 *   pmd_trans_huge(*pmd) == 0 (without ptl)
+	 *   // skip the pmd
+	 *				 set_pmd_at();
+	 *				 // pmd is re-established
+	 *
+	 * The race makes MADV_DONTNEED miss the huge pmd and don't clear it
+	 * which may break userspace.
+	 *
+	 * pmdp_invalidate() is required to make sure we don't miss
+	 * dirty/young flags set by hardware.
+	 */
+	entry = *pmd;
+	pmdp_invalidate(vma, addr, pmd);
+
+	/*
+	 * Recover dirty/young flags.  It relies on pmdp_invalidate to not
+	 * corrupt them.
+	 */
+	if (pmd_dirty(*pmd))
+		entry = pmd_mkdirty(entry);
+	if (pmd_young(*pmd))
+		entry = pmd_mkyoung(entry);
+
 	entry = pmd_modify(entry, newprot);
 	if (preserve_write)
 		entry = pmd_mk_savedwrite(entry);

From c0c379e2931b05facef538e53bf3b21f283d9a0b Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 13 Apr 2017 14:56:23 -0700
Subject: [PATCH 392/410] mm: drop unused pmdp_huge_get_and_clear_notify()

Dave noticed that after fixing MADV_DONTNEED vs numa balancing race the
last pmdp_huge_get_and_clear_notify() user is gone.

Let's drop the helper.

Link: http://lkml.kernel.org/r/20170306112047.24809-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 51891fb0d3ce..c91b3bcd158f 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	___pud;								\
 })
 
-#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd)		\
-({									\
-	unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;		\
-	pmd_t ___pmd;							\
-									\
-	___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd);		\
-	mmu_notifier_invalidate_range(__mm, ___haddr,			\
-				      ___haddr + HPAGE_PMD_SIZE);	\
-									\
-	___pmd;								\
-})
-
 /*
  * set_pte_at_notify() sets the pte _after_ running the notifier.
  * This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 #define	ptep_clear_flush_notify ptep_clear_flush
 #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
 #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
-#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
 #define set_pte_at_notify set_pte_at
 
 #endif /* CONFIG_MMU_NOTIFIER */

From 58ceeb6bec86d9140f9d91d71a710e963523d063 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 13 Apr 2017 14:56:26 -0700
Subject: [PATCH 393/410] thp: fix MADV_DONTNEED vs. MADV_FREE race

Both MADV_DONTNEED and MADV_FREE handled with down_read(mmap_sem).

It's critical to not clear pmd intermittently while handling MADV_FREE
to avoid race with MADV_DONTNEED:

	CPU0:				CPU1:
				madvise_free_huge_pmd()
				 pmdp_huge_get_and_clear_full()
madvise_dontneed()
 zap_pmd_range()
  pmd_trans_huge(*pmd) == 0 (without ptl)
  // skip the pmd
				 set_pmd_at();
				 // pmd is re-established

It results in MADV_DONTNEED skipping the pmd, leaving it not cleared.
It violates MADV_DONTNEED interface and can result is userspace
misbehaviour.

Basically it's the same race as with numa balancing in
change_huge_pmd(), but a bit simpler to mitigate: we don't need to
preserve dirty/young flags here due to MADV_FREE functionality.

[kirill.shutemov@linux.intel.com: Urgh... Power is special again]
  Link: http://lkml.kernel.org/r/20170303102636.bhd2zhtpds4mt62a@black.fi.intel.com
Link: http://lkml.kernel.org/r/20170302151034.27829-4-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 26769465af63..f3c4f9d22821 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		deactivate_page(page);
 
 	if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
-		orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
-			tlb->fullmm);
+		pmdp_invalidate(vma, addr, pmd);
 		orig_pmd = pmd_mkold(orig_pmd);
 		orig_pmd = pmd_mkclean(orig_pmd);
 

From 5b7abeae3af8c08c577e599dd0578b9e3ee6687b Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Thu, 13 Apr 2017 14:56:28 -0700
Subject: [PATCH 394/410] thp: fix MADV_DONTNEED vs clear soft dirty race

Yet another instance of the same race.

Fix is identical to change_huge_pmd().

See "thp: fix MADV_DONTNEED vs.  numa balancing race" for more details.

Link: http://lkml.kernel.org/r/20170302151034.27829-5-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f08bd31c1081..312578089544 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
 		unsigned long addr, pmd_t *pmdp)
 {
-	pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
+	pmd_t pmd = *pmdp;
+
+	/* See comment in change_huge_pmd() */
+	pmdp_invalidate(vma, addr, pmdp);
+	if (pmd_dirty(*pmdp))
+		pmd = pmd_mkdirty(pmd);
+	if (pmd_young(*pmdp))
+		pmd = pmd_mkyoung(pmd);
 
 	pmd = pmd_wrprotect(pmd);
 	pmd = pmd_clear_soft_dirty(pmd);

From 045c7a3f53d9403b62d396b6d051c4be5044cdb4 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Thu, 13 Apr 2017 14:56:32 -0700
Subject: [PATCH 395/410] hugetlbfs: fix offset overflow in hugetlbfs mmap

If mmap() maps a file, it can be passed an offset into the file at which
the mapping is to start.  Offset could be a negative value when
represented as a loff_t.  The offset plus length will be used to update
the file size (i_size) which is also a loff_t.

Validate the value of offset and offset + length to make sure they do
not overflow and appear as negative.

Found by syzcaller with commit ff8c0c53c475 ("mm/hugetlb.c: don't call
region_abort if region_chg fails") applied.  Prior to this commit, the
overflow would still occur but we would luckily return ENOMEM.

To reproduce:

   mmap(0, 0x2000, 0, 0x40021, 0xffffffffffffffffULL, 0x8000000000000000ULL);

Resulted in,

  kernel BUG at mm/hugetlb.c:742!
  Call Trace:
   hugetlbfs_evict_inode+0x80/0xa0
   evict+0x24a/0x620
   iput+0x48f/0x8c0
   dentry_unlink_inode+0x31f/0x4d0
   __dentry_kill+0x292/0x5e0
   dput+0x730/0x830
   __fput+0x438/0x720
   ____fput+0x1a/0x20
   task_work_run+0xfe/0x180
   exit_to_usermode_loop+0x133/0x150
   syscall_return_slowpath+0x184/0x1c0
   entry_SYSCALL_64_fastpath+0xab/0xad

Fixes: ff8c0c53c475 ("mm/hugetlb.c: don't call region_abort if region_chg fails")
Link: http://lkml.kernel.org/r/1491951118-30678-1-git-send-email-mike.kravetz@oracle.com
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7163fe014b57..dde861387a40 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
 	vma->vm_ops = &hugetlb_vm_ops;
 
+	/*
+	 * Offset passed to mmap (before page shift) could have been
+	 * negative when represented as a (l)off_t.
+	 */
+	if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
+		return -EINVAL;
+
 	if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
 		return -EINVAL;
 
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
+	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+	/* check for overflow */
+	if (len < vma_len)
+		return -EINVAL;
 
 	inode_lock(inode);
 	file_accessed(file);
 
 	ret = -ENOMEM;
-	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
-
 	if (hugetlb_reserve_pages(inode,
 				vma->vm_pgoff >> huge_page_order(h),
 				len >> huge_page_shift(h), vma,
@@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 
 	ret = 0;
 	if (vma->vm_flags & VM_WRITE && inode->i_size < len)
-		inode->i_size = len;
+		i_size_write(inode, len);
 out:
 	inode_unlock(inode);
 

From 4ca82dabc9fbf7bc5322aa54d802cb3cb7b125c5 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 13 Apr 2017 14:56:35 -0700
Subject: [PATCH 396/410] zram: fix operator precedence to get offset

In zram_rw_page, the logic to get offset is wrong by operator precedence
(i.e., "<<" is higher than "&").  With wrong offset, zram can corrupt
the user's data.  This patch fixes it.

Fixes: 8c7f01025 ("zram: implement rw_page operation of zram")
Link: http://lkml.kernel.org/r/1492042622-12074-1-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index dceb5edd1e54..03a7408e090d 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -928,7 +928,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
 	}
 
 	index = sector >> SECTORS_PER_PAGE_SHIFT;
-	offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
+	offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
 
 	bv.bv_page = page;
 	bv.bv_len = PAGE_SIZE;

From d72e9a7a93e4f8e9e52491921d99e0c8aa89eb4e Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 13 Apr 2017 14:56:37 -0700
Subject: [PATCH 397/410] zram: do not use copy_page with non-page aligned
 address

The copy_page is optimized memcpy for page-alinged address.  If it is
used with non-page aligned address, it can corrupt memory which means
system corruption.  With zram, it can happen with

1. 64K architecture
2. partial IO
3. slub debug

Partial IO need to allocate a page and zram allocates it via kmalloc.
With slub debug, kmalloc(PAGE_SIZE) doesn't return page-size aligned
address.  And finally, copy_page(mem, cmem) corrupts memory.

So, this patch changes it to memcpy.

Actuaully, we don't need to change zram_bvec_write part because zsmalloc
returns page-aligned address in case of PAGE_SIZE class but it's not
good to rely on the internal of zsmalloc.

Note:
 When this patch is merged to stable, clear_page should be fixed, too.
 Unfortunately, recent zram removes it by "same page merge" feature so
 it's hard to backport this patch to -stable tree.

I will handle it when I receive the mail from stable tree maintainer to
merge this patch to backport.

Fixes: 42e99bd ("zram: optimize memory operations with clear_page()/copy_page()")
Link: http://lkml.kernel.org/r/1492042622-12074-2-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 03a7408e090d..0c09d4256108 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
 
 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
 	if (size == PAGE_SIZE) {
-		copy_page(mem, cmem);
+		memcpy(mem, cmem, PAGE_SIZE);
 	} else {
 		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
 
@@ -717,7 +717,7 @@ compress_again:
 
 	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
 		src = kmap_atomic(page);
-		copy_page(cmem, src);
+		memcpy(cmem, src, PAGE_SIZE);
 		kunmap_atomic(src);
 	} else {
 		memcpy(cmem, src, clen);

From 85d492f28d056c40629fc25d79f54da618a29dc4 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 13 Apr 2017 14:56:40 -0700
Subject: [PATCH 398/410] zsmalloc: expand class bit

Now 64K page system, zsamlloc has 257 classes so 8 class bit is not
enough.  With that, it corrupts the system when zsmalloc stores
65536byte data(ie, index number 256) so that this patch increases class
bit for simple fix for stable backport.  We should clean up this mess
soon.

  index	size
  0	32
  1	288
  ..
  ..
  204	52256
  256	65536

Fixes: 3783689a1 ("zsmalloc: introduce zspage structure")
Link: http://lkml.kernel.org/r/1492042622-12074-3-git-send-email-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/zsmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b7ee9c34dbd6..d41edd28298b 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -276,7 +276,7 @@ struct zs_pool {
 struct zspage {
 	struct {
 		unsigned int fullness:FULLNESS_BITS;
-		unsigned int class:CLASS_BITS;
+		unsigned int class:CLASS_BITS + 1;
 		unsigned int isolated:ISOLATED_BITS;
 		unsigned int magic:MAGIC_VAL_BITS;
 	};

From 5714320d316ae6e830bfc463bfdaf288695f6b62 Mon Sep 17 00:00:00 2001
From: Martin Kepplinger <martin.kepplinger@ginzinger.com>
Date: Thu, 13 Apr 2017 14:56:43 -0700
Subject: [PATCH 399/410] mailmap: add Martin Kepplinger's email

Set the partly deprecated companies' email addresses as alias for the
personal one.

Link: http://lkml.kernel.org/r/1491984622-17321-1-git-send-email-martin.kepplinger@ginzinger.com
Signed-off-by: Martin Kepplinger <martin.kepplinger@ginzinger.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.mailmap b/.mailmap
index e229922dc7f0..1d6f4e7280dc 100644
--- a/.mailmap
+++ b/.mailmap
@@ -99,6 +99,8 @@ Linas Vepstas <linas@austin.ibm.com>
 Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
 Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
 Mark Brown <broonie@sirena.org.uk>
+Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
+Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
 Matthieu CASTET <castet.matthieu@free.fr>
 Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
 Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>

From f2200ac311302fcdca6556fd0c5127eab6c65a3e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 11 Apr 2017 10:10:28 +0200
Subject: [PATCH 400/410] perf/x86: Avoid exposing wrong/stale data in
 intel_pmu_lbr_read_32()

When the perf_branch_entry::{in_tx,abort,cycles} fields were added,
intel_pmu_lbr_read_32() wasn't updated to initialize them.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Cc: <stable@vger.kernel.org>
Fixes: 135c5612c460 ("perf/x86/intel: Support Haswell/v4 LBR format")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/lbr.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 81b321ace8e0..f924629836a8 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -507,6 +507,9 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
 		cpuc->lbr_entries[i].to		= msr_lastbranch.to;
 		cpuc->lbr_entries[i].mispred	= 0;
 		cpuc->lbr_entries[i].predicted	= 0;
+		cpuc->lbr_entries[i].in_tx	= 0;
+		cpuc->lbr_entries[i].abort	= 0;
+		cpuc->lbr_entries[i].cycles	= 0;
 		cpuc->lbr_entries[i].reserved	= 0;
 	}
 	cpuc->lbr_stack.nr = i;

From 75eb5e1e7b4edbc8e8f930de59004d21cb46961f Mon Sep 17 00:00:00 2001
From: Tyler Baker <tyler.baker@linaro.org>
Date: Thu, 13 Apr 2017 15:27:31 -0700
Subject: [PATCH 401/410] irqchip/irq-imx-gpcv2: Fix spinlock initialization

The raw_spinlock in the IMX GPCV2 interupt chip is not initialized before
usage. That results in a lockdep splat:

  INFO: trying to register non-static key.
  the code is fine but needs lockdep annotation.
  turning off the locking correctness validator.

Add the missing raw_spin_lock_init() to the setup code.

Fixes: e324c4dc4a59 ("irqchip/imx-gpcv2: IMX GPCv2 driver for wakeup sources")
Signed-off-by: Tyler Baker <tyler.baker@linaro.org>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Cc: jason@lakedaemon.net
Cc: marc.zyngier@arm.com
Cc: shawnguo@kernel.org
Cc: andrew.smirnov@gmail.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20170413222731.5917-1-tyler.baker@linaro.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/irqchip/irq-imx-gpcv2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c
index 15af9a9753e5..2d203b422129 100644
--- a/drivers/irqchip/irq-imx-gpcv2.c
+++ b/drivers/irqchip/irq-imx-gpcv2.c
@@ -230,6 +230,8 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node,
 		return -ENOMEM;
 	}
 
+	raw_spin_lock_init(&cd->rlock);
+
 	cd->gpc_base = of_iomap(node, 0);
 	if (!cd->gpc_base) {
 		pr_err("fsl-gpcv2: unable to map gpc registers\n");

From a8983d01f9b7d600fbdb3916899edf395954cc83 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 14 Apr 2017 10:57:52 +0200
Subject: [PATCH 402/410] Revert "tty: don't panic on OOM in tty_set_ldisc()"

This reverts commit 5362544bebe85071188dd9e479b5a5040841c895 as it is
reported to cause a reproducable crash.

Fixes: 5362544bebe8 ("tty: don't panic on OOM in tty_set_ldisc()")
Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: <syzkaller@googlegroups.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
---
 drivers/tty/tty_ldisc.c | 85 +++++++++++++++++++++++++++++++++--------
 1 file changed, 69 insertions(+), 16 deletions(-)

diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index b0500a0a87b8..e4603b09863a 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -491,6 +491,41 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
 	tty_ldisc_debug(tty, "%p: closed\n", ld);
 }
 
+/**
+ *	tty_ldisc_restore	-	helper for tty ldisc change
+ *	@tty: tty to recover
+ *	@old: previous ldisc
+ *
+ *	Restore the previous line discipline or N_TTY when a line discipline
+ *	change fails due to an open error
+ */
+
+static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
+{
+	struct tty_ldisc *new_ldisc;
+	int r;
+
+	/* There is an outstanding reference here so this is safe */
+	old = tty_ldisc_get(tty, old->ops->num);
+	WARN_ON(IS_ERR(old));
+	tty->ldisc = old;
+	tty_set_termios_ldisc(tty, old->ops->num);
+	if (tty_ldisc_open(tty, old) < 0) {
+		tty_ldisc_put(old);
+		/* This driver is always present */
+		new_ldisc = tty_ldisc_get(tty, N_TTY);
+		if (IS_ERR(new_ldisc))
+			panic("n_tty: get");
+		tty->ldisc = new_ldisc;
+		tty_set_termios_ldisc(tty, N_TTY);
+		r = tty_ldisc_open(tty, new_ldisc);
+		if (r < 0)
+			panic("Couldn't open N_TTY ldisc for "
+			      "%s --- error %d.",
+			      tty_name(tty), r);
+	}
+}
+
 /**
  *	tty_set_ldisc		-	set line discipline
  *	@tty: the terminal to set
@@ -504,7 +539,12 @@ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
 
 int tty_set_ldisc(struct tty_struct *tty, int disc)
 {
-	int retval, old_disc;
+	int retval;
+	struct tty_ldisc *old_ldisc, *new_ldisc;
+
+	new_ldisc = tty_ldisc_get(tty, disc);
+	if (IS_ERR(new_ldisc))
+		return PTR_ERR(new_ldisc);
 
 	tty_lock(tty);
 	retval = tty_ldisc_lock(tty, 5 * HZ);
@@ -517,8 +557,7 @@ int tty_set_ldisc(struct tty_struct *tty, int disc)
 	}
 
 	/* Check the no-op case */
-	old_disc = tty->ldisc->ops->num;
-	if (old_disc == disc)
+	if (tty->ldisc->ops->num == disc)
 		goto out;
 
 	if (test_bit(TTY_HUPPED, &tty->flags)) {
@@ -527,25 +566,34 @@ int tty_set_ldisc(struct tty_struct *tty, int disc)
 		goto out;
 	}
 
-	retval = tty_ldisc_reinit(tty, disc);
+	old_ldisc = tty->ldisc;
+
+	/* Shutdown the old discipline. */
+	tty_ldisc_close(tty, old_ldisc);
+
+	/* Now set up the new line discipline. */
+	tty->ldisc = new_ldisc;
+	tty_set_termios_ldisc(tty, disc);
+
+	retval = tty_ldisc_open(tty, new_ldisc);
 	if (retval < 0) {
 		/* Back to the old one or N_TTY if we can't */
-		if (tty_ldisc_reinit(tty, old_disc) < 0) {
-			pr_err("tty: TIOCSETD failed, reinitializing N_TTY\n");
-			if (tty_ldisc_reinit(tty, N_TTY) < 0) {
-				/* At this point we have tty->ldisc == NULL. */
-				pr_err("tty: reinitializing N_TTY failed\n");
-			}
-		}
+		tty_ldisc_put(new_ldisc);
+		tty_ldisc_restore(tty, old_ldisc);
 	}
 
-	if (tty->ldisc && tty->ldisc->ops->num != old_disc &&
-	    tty->ops->set_ldisc) {
+	if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) {
 		down_read(&tty->termios_rwsem);
 		tty->ops->set_ldisc(tty);
 		up_read(&tty->termios_rwsem);
 	}
 
+	/* At this point we hold a reference to the new ldisc and a
+	   reference to the old ldisc, or we hold two references to
+	   the old ldisc (if it was restored as part of error cleanup
+	   above). In either case, releasing a single reference from
+	   the old ldisc is correct. */
+	new_ldisc = old_ldisc;
 out:
 	tty_ldisc_unlock(tty);
 
@@ -553,6 +601,7 @@ out:
 	   already running */
 	tty_buffer_restart_work(tty->port);
 err:
+	tty_ldisc_put(new_ldisc);	/* drop the extra reference */
 	tty_unlock(tty);
 	return retval;
 }
@@ -613,8 +662,10 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 	int retval;
 
 	ld = tty_ldisc_get(tty, disc);
-	if (IS_ERR(ld))
+	if (IS_ERR(ld)) {
+		BUG_ON(disc == N_TTY);
 		return PTR_ERR(ld);
+	}
 
 	if (tty->ldisc) {
 		tty_ldisc_close(tty, tty->ldisc);
@@ -626,8 +677,10 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
 	tty_set_termios_ldisc(tty, disc);
 	retval = tty_ldisc_open(tty, tty->ldisc);
 	if (retval) {
-		tty_ldisc_put(tty->ldisc);
-		tty->ldisc = NULL;
+		if (!WARN_ON(disc == N_TTY)) {
+			tty_ldisc_put(tty->ldisc);
+			tty->ldisc = NULL;
+		}
 	}
 	return retval;
 }

From e68d490def9f7d726eea6dc62ebd692af64ec8b5 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Fri, 14 Apr 2017 13:38:32 +0200
Subject: [PATCH 403/410] fbdev: omapfb: delete check_required_callbacks()

Commit 561eb9d09a93 ("fbdev: omap/lcd: Make callbacks optional") made
panel callbacks optional but forgot to update check_required_callbacks().
As a result many (all?) OMAP systems using omapfb will crash at boot.
Fix by deleting the whole function.

Fixes: 561eb9d09a93 ("fbdev: omap/lcd: Make callbacks optional")
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/omap/omapfb_main.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index 1abba07b84b3..f4cbfb3b8a09 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -1608,19 +1608,6 @@ static int omapfb_find_ctrl(struct omapfb_device *fbdev)
 	return 0;
 }
 
-static void check_required_callbacks(struct omapfb_device *fbdev)
-{
-#define _C(x) (fbdev->ctrl->x != NULL)
-#define _P(x) (fbdev->panel->x != NULL)
-	BUG_ON(fbdev->ctrl == NULL || fbdev->panel == NULL);
-	BUG_ON(!(_C(init) && _C(cleanup) && _C(get_caps) &&
-		 _C(set_update_mode) && _C(setup_plane) && _C(enable_plane) &&
-		 _P(init) && _P(cleanup) && _P(enable) && _P(disable) &&
-		 _P(get_caps)));
-#undef _P
-#undef _C
-}
-
 /*
  * Called by LDM binding to probe and attach a new device.
  * Initialization sequence:
@@ -1705,8 +1692,6 @@ static int omapfb_do_probe(struct platform_device *pdev,
 		omapfb_ops.fb_mmap = omapfb_mmap;
 	init_state++;
 
-	check_required_callbacks(fbdev);
-
 	r = planes_init(fbdev);
 	if (r)
 		goto cleanup;

From 5a8d75a1b8c99bdc926ba69b7b7dbe4fae81a5af Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 14 Apr 2017 13:58:29 -0600
Subject: [PATCH 404/410] block: fix bio_will_gap() for first bvec with offset

Commit 729204ef49ec("block: relax check on sg gap") allows us to merge
bios, if both are physically contiguous.  This change can merge a huge
number of small bios, through mkfs for example, mkfs.ntfs running time
can be decreased to ~1/10.

But if one rq starts with a non-aligned buffer (the 1st bvec's bv_offset
is non-zero) and if we allow the merge, it is quite difficult to respect
sg gap limit, especially the max segment size, or we risk having an
unaligned virtual boundary.  This patch tries to avoid the issue by
disallowing a merge, if the req starts with an unaligned buffer.

Also add comments to explain why the merged segment can't end in
unaligned virt boundary.

Fixes: 729204ef49ec ("block: relax check on sg gap")
Tested-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>

Rewrote parts of the commit message and comments.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7548f332121a..01a696b0a4d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1672,12 +1672,36 @@ static inline bool bios_segs_mergeable(struct request_queue *q,
 	return true;
 }
 
-static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
-			 struct bio *next)
+static inline bool bio_will_gap(struct request_queue *q,
+				struct request *prev_rq,
+				struct bio *prev,
+				struct bio *next)
 {
 	if (bio_has_data(prev) && queue_virt_boundary(q)) {
 		struct bio_vec pb, nb;
 
+		/*
+		 * don't merge if the 1st bio starts with non-zero
+		 * offset, otherwise it is quite difficult to respect
+		 * sg gap limit. We work hard to merge a huge number of small
+		 * single bios in case of mkfs.
+		 */
+		if (prev_rq)
+			bio_get_first_bvec(prev_rq->bio, &pb);
+		else
+			bio_get_first_bvec(prev, &pb);
+		if (pb.bv_offset)
+			return true;
+
+		/*
+		 * We don't need to worry about the situation that the
+		 * merged segment ends in unaligned virt boundary:
+		 *
+		 * - if 'pb' ends aligned, the merged segment ends aligned
+		 * - if 'pb' ends unaligned, the next bio must include
+		 *   one single bvec of 'nb', otherwise the 'nb' can't
+		 *   merge with 'pb'
+		 */
 		bio_get_last_bvec(prev, &pb);
 		bio_get_first_bvec(next, &nb);
 
@@ -1690,12 +1714,12 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
 
 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
 {
-	return bio_will_gap(req->q, req->biotail, bio);
+	return bio_will_gap(req->q, req, req->biotail, bio);
 }
 
 static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
 {
-	return bio_will_gap(req->q, bio, req->bio);
+	return bio_will_gap(req->q, NULL, bio, req->bio);
 }
 
 int kblockd_schedule_work(struct work_struct *work);

From 82cc4fc2e70ec5baeff8f776f2773abc8b2cc0ae Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 14 Apr 2017 17:45:45 -0400
Subject: [PATCH 405/410] ftrace: Fix removing of second function probe

When two function probes are added to set_ftrace_filter, and then one of
them is removed, the update to the function locations is not performed, and
the record keeping of the function states are corrupted, and causes an
ftrace_bug() to occur.

This is easily reproducable by adding two probes, removing one, and then
adding it back again.

 # cd /sys/kernel/debug/tracing
 # echo schedule:traceoff > set_ftrace_filter
 # echo do_IRQ:traceoff > set_ftrace_filter
 # echo \!do_IRQ:traceoff > /debug/tracing/set_ftrace_filter
 # echo do_IRQ:traceoff > set_ftrace_filter

Causes:
 ------------[ cut here ]------------
 WARNING: CPU: 2 PID: 1098 at kernel/trace/ftrace.c:2369 ftrace_get_addr_curr+0x143/0x220
 Modules linked in: [...]
 CPU: 2 PID: 1098 Comm: bash Not tainted 4.10.0-test+ #405
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012
 Call Trace:
  dump_stack+0x68/0x9f
  __warn+0x111/0x130
  ? trace_irq_work_interrupt+0xa0/0xa0
  warn_slowpath_null+0x1d/0x20
  ftrace_get_addr_curr+0x143/0x220
  ? __fentry__+0x10/0x10
  ftrace_replace_code+0xe3/0x4f0
  ? ftrace_int3_handler+0x90/0x90
  ? printk+0x99/0xb5
  ? 0xffffffff81000000
  ftrace_modify_all_code+0x97/0x110
  arch_ftrace_update_code+0x10/0x20
  ftrace_run_update_code+0x1c/0x60
  ftrace_run_modify_code.isra.48.constprop.62+0x8e/0xd0
  register_ftrace_function_probe+0x4b6/0x590
  ? ftrace_startup+0x310/0x310
  ? debug_lockdep_rcu_enabled.part.4+0x1a/0x30
  ? update_stack_state+0x88/0x110
  ? ftrace_regex_write.isra.43.part.44+0x1d3/0x320
  ? preempt_count_sub+0x18/0xd0
  ? mutex_lock_nested+0x104/0x800
  ? ftrace_regex_write.isra.43.part.44+0x1d3/0x320
  ? __unwind_start+0x1c0/0x1c0
  ? _mutex_lock_nest_lock+0x800/0x800
  ftrace_trace_probe_callback.isra.3+0xc0/0x130
  ? func_set_flag+0xe0/0xe0
  ? __lock_acquire+0x642/0x1790
  ? __might_fault+0x1e/0x20
  ? trace_get_user+0x398/0x470
  ? strcmp+0x35/0x60
  ftrace_trace_onoff_callback+0x48/0x70
  ftrace_regex_write.isra.43.part.44+0x251/0x320
  ? match_records+0x420/0x420
  ftrace_filter_write+0x2b/0x30
  __vfs_write+0xd7/0x330
  ? do_loop_readv_writev+0x120/0x120
  ? locks_remove_posix+0x90/0x2f0
  ? do_lock_file_wait+0x160/0x160
  ? __lock_is_held+0x93/0x100
  ? rcu_read_lock_sched_held+0x5c/0xb0
  ? preempt_count_sub+0x18/0xd0
  ? __sb_start_write+0x10a/0x230
  ? vfs_write+0x222/0x240
  vfs_write+0xef/0x240
  SyS_write+0xab/0x130
  ? SyS_read+0x130/0x130
  ? trace_hardirqs_on_caller+0x182/0x280
  ? trace_hardirqs_on_thunk+0x1a/0x1c
  entry_SYSCALL_64_fastpath+0x18/0xad
 RIP: 0033:0x7fe61c157c30
 RSP: 002b:00007ffe87890258 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
 RAX: ffffffffffffffda RBX: ffffffff8114a410 RCX: 00007fe61c157c30
 RDX: 0000000000000010 RSI: 000055814798f5e0 RDI: 0000000000000001
 RBP: ffff8800c9027f98 R08: 00007fe61c422740 R09: 00007fe61ca53700
 R10: 0000000000000073 R11: 0000000000000246 R12: 0000558147a36400
 R13: 00007ffe8788f160 R14: 0000000000000024 R15: 00007ffe8788f15c
  ? trace_hardirqs_off_caller+0xc0/0x110
 ---[ end trace 99fa09b3d9869c2c ]---
 Bad trampoline accounting at: ffffffff81cc3b00 (do_IRQ+0x0/0x150)

Cc: stable@vger.kernel.org
Fixes: 59df055f1991 ("ftrace: trace different functions with a different tracer")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b9691ee8f6c1..27bb2e61276e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3755,23 +3755,24 @@ static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash)
 	ftrace_probe_registered = 1;
 }
 
-static void __disable_ftrace_function_probe(void)
+static bool __disable_ftrace_function_probe(void)
 {
 	int i;
 
 	if (!ftrace_probe_registered)
-		return;
+		return false;
 
 	for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
 		struct hlist_head *hhd = &ftrace_func_hash[i];
 		if (hhd->first)
-			return;
+			return false;
 	}
 
 	/* no more funcs left */
 	ftrace_shutdown(&trace_probe_ops, 0);
 
 	ftrace_probe_registered = 0;
+	return true;
 }
 
 
@@ -3901,6 +3902,7 @@ static void
 __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 				  void *data, int flags)
 {
+	struct ftrace_ops_hash old_hash_ops;
 	struct ftrace_func_entry *rec_entry;
 	struct ftrace_func_probe *entry;
 	struct ftrace_func_probe *p;
@@ -3912,6 +3914,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 	struct hlist_node *tmp;
 	char str[KSYM_SYMBOL_LEN];
 	int i, ret;
+	bool disabled;
 
 	if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
 		func_g.search = NULL;
@@ -3930,6 +3933,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 
 	mutex_lock(&trace_probe_ops.func_hash->regex_lock);
 
+	old_hash_ops.filter_hash = old_hash;
+	/* Probes only have filters */
+	old_hash_ops.notrace_hash = NULL;
+
 	hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
 	if (!hash)
 		/* Hmm, should report this somehow */
@@ -3967,12 +3974,17 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 		}
 	}
 	mutex_lock(&ftrace_lock);
-	__disable_ftrace_function_probe();
+	disabled = __disable_ftrace_function_probe();
 	/*
 	 * Remove after the disable is called. Otherwise, if the last
 	 * probe is removed, a null hash means *all enabled*.
 	 */
 	ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+
+	/* still need to update the function call sites */
+	if (ftrace_enabled && !disabled)
+		ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS,
+				       &old_hash_ops);
 	synchronize_sched();
 	if (!ret)
 		free_ftrace_hash_rcu(old_hash);

From 409c1b250e30ad0e48b4d15d7319b4e18c046c4f Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 14 Apr 2017 14:15:20 -0400
Subject: [PATCH 406/410] parisc: fix bugs in pa_memcpy

The patch 554bfeceb8a22d448cd986fc9efce25e833278a1 ("parisc: Fix access
fault handling in pa_memcpy()") reimplements the pa_memcpy function.
Unfortunatelly, it makes the kernel unbootable. The crash happens in the
function ide_complete_cmd where memcpy is called with the same source
and destination address.

This patch fixes a few bugs in pa_memcpy:

* When jumping to .Lcopy_loop_16 for the first time, don't skip the
  instruction "ldi 31,t0" (this bug made the kernel unbootable)
* Use the COND macro when comparing length, so that the comparison is
  64-bit (a theoretical issue, in case the length is greater than
  0xffffffff)
* Don't use the COND macro after the "extru" instruction (the PA-RISC
  specification says that the upper 32-bits of extru result are undefined,
  although they are set to zero in practice)
* Fix exception addresses in .Lcopy16_fault and .Lcopy8_fault
* Rename .Lcopy_loop_4 to .Lcopy_loop_8 (so that it is consistent with
  .Lcopy8_fault)

Cc: <stable@vger.kernel.org> # v4.9+
Fixes: 554bfeceb8a2 ("parisc: Fix access fault handling in pa_memcpy()")
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/lib/lusercopy.S | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index f01188c044ee..85c28bb80fb7 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -201,7 +201,7 @@ ENTRY_CFI(pa_memcpy)
 	add	dst,len,end
 
 	/* short copy with less than 16 bytes? */
-	cmpib,>>=,n 15,len,.Lbyte_loop
+	cmpib,COND(>>=),n 15,len,.Lbyte_loop
 
 	/* same alignment? */
 	xor	src,dst,t0
@@ -216,7 +216,7 @@ ENTRY_CFI(pa_memcpy)
 	/* loop until we are 64-bit aligned */
 .Lalign_loop64:
 	extru	dst,31,3,t1
-	cmpib,=,n	0,t1,.Lcopy_loop_16
+	cmpib,=,n	0,t1,.Lcopy_loop_16_start
 20:	ldb,ma	1(srcspc,src),t1
 21:	stb,ma	t1,1(dstspc,dst)
 	b	.Lalign_loop64
@@ -225,6 +225,7 @@ ENTRY_CFI(pa_memcpy)
 	ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
 	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 
+.Lcopy_loop_16_start:
 	ldi	31,t0
 .Lcopy_loop_16:
 	cmpb,COND(>>=),n t0,len,.Lword_loop
@@ -267,7 +268,7 @@ ENTRY_CFI(pa_memcpy)
 	/* loop until we are 32-bit aligned */
 .Lalign_loop32:
 	extru	dst,31,2,t1
-	cmpib,=,n	0,t1,.Lcopy_loop_4
+	cmpib,=,n	0,t1,.Lcopy_loop_8
 20:	ldb,ma	1(srcspc,src),t1
 21:	stb,ma	t1,1(dstspc,dst)
 	b	.Lalign_loop32
@@ -277,7 +278,7 @@ ENTRY_CFI(pa_memcpy)
 	ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
 
 
-.Lcopy_loop_4:
+.Lcopy_loop_8:
 	cmpib,COND(>>=),n 15,len,.Lbyte_loop
 
 10:	ldw	0(srcspc,src),t1
@@ -299,7 +300,7 @@ ENTRY_CFI(pa_memcpy)
 	ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
 	ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
 
-	b	.Lcopy_loop_4
+	b	.Lcopy_loop_8
 	ldo	-16(len),len
 
 .Lbyte_loop:
@@ -324,7 +325,7 @@ ENTRY_CFI(pa_memcpy)
 .Lunaligned_copy:
 	/* align until dst is 32bit-word-aligned */
 	extru	dst,31,2,t1
-	cmpib,COND(=),n	0,t1,.Lcopy_dstaligned
+	cmpib,=,n	0,t1,.Lcopy_dstaligned
 20:	ldb	0(srcspc,src),t1
 	ldo	1(src),src
 21:	stb,ma	t1,1(dstspc,dst)
@@ -362,7 +363,7 @@ ENTRY_CFI(pa_memcpy)
 	cmpiclr,<> 1,t0,%r0
 	b,n .Lcase1
 .Lcase0:
-	cmpb,= %r0,len,.Lcda_finish
+	cmpb,COND(=) %r0,len,.Lcda_finish
 	nop
 
 1:	ldw,ma 4(srcspc,src), a3
@@ -376,7 +377,7 @@ ENTRY_CFI(pa_memcpy)
 1:	ldw,ma 4(srcspc,src), a3
 	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
 	ldo -1(len),len
-	cmpb,=,n %r0,len,.Ldo0
+	cmpb,COND(=),n %r0,len,.Ldo0
 .Ldo4:
 1:	ldw,ma 4(srcspc,src), a0
 	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
@@ -402,7 +403,7 @@ ENTRY_CFI(pa_memcpy)
 1:	stw,ma t0, 4(dstspc,dst)
 	ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
 	ldo -4(len),len
-	cmpb,<> %r0,len,.Ldo4
+	cmpb,COND(<>) %r0,len,.Ldo4
 	nop
 .Ldo0:
 	shrpw a2, a3, %sar, t0
@@ -436,14 +437,14 @@ ENTRY_CFI(pa_memcpy)
 	/* fault exception fixup handlers: */
 #ifdef CONFIG_64BIT
 .Lcopy16_fault:
-10:	b	.Lcopy_done
-	std,ma	t1,8(dstspc,dst)
+	b	.Lcopy_done
+10:	std,ma	t1,8(dstspc,dst)
 	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 #endif
 
 .Lcopy8_fault:
-10:	b	.Lcopy_done
-	stw,ma	t1,4(dstspc,dst)
+	b	.Lcopy_done
+10:	stw,ma	t1,4(dstspc,dst)
 	ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
 
 	.exit

From c0eb027e5aef70b71e5a38ee3e264dc0b497f343 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 2 Apr 2017 17:10:08 -0700
Subject: [PATCH 407/410] vfs: don't do RCU lookup of empty pathnames

Normal pathname lookup doesn't allow empty pathnames, but using
AT_EMPTY_PATH (with name_to_handle_at() or fstatat(), for example) you
can trigger an empty pathname lookup.

And not only is the RCU lookup in that case entirely unnecessary
(because we'll obviously immediately finalize the end result), it is
actively wrong.

Why? An empth path is a special case that will return the original
'dirfd' dentry - and that dentry may not actually be RCU-free'd,
resulting in a potential use-after-free if we were to initialize the
path lazily under the RCU read lock and depend on complete_walk()
finalizing the dentry.

Found by syzkaller and KASAN.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Reported-by: Vegard Nossum <vegard.nossum@gmail.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/namei.c b/fs/namei.c
index d41fab78798b..19dcf62133cc 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2145,6 +2145,9 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
 	int retval = 0;
 	const char *s = nd->name->name;
 
+	if (!*s)
+		flags &= ~LOOKUP_RCU;
+
 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
 	nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
 	nd->depth = 0;

From 1ec1688c5360e14dde4094d6acbf7516bf6db37e Mon Sep 17 00:00:00 2001
From: Martin Brandenburg <martin@omnibond.com>
Date: Fri, 14 Apr 2017 14:22:41 -0400
Subject: [PATCH 408/410] orangefs: free superblock when mount fails

Otherwise lockdep says:

[ 1337.483798] ================================================
[ 1337.483999] [ BUG: lock held when returning to user space! ]
[ 1337.484252] 4.11.0-rc6 #19 Not tainted
[ 1337.484423] ------------------------------------------------
[ 1337.484626] mount/14766 is leaving the kernel with locks still held!
[ 1337.484841] 1 lock held by mount/14766:
[ 1337.485017]  #0:  (&type->s_umount_key#33/1){+.+.+.}, at: [<ffffffff8124171f>] sget_userns+0x2af/0x520

Caught by xfstests generic/413 which tried to mount with the unsupported
mount option dax.  Then xfstests generic/422 ran sync which deadlocks.

Signed-off-by: Martin Brandenburg <martin@omnibond.com>
Acked-by: Mike Marshall <hubcap@omnibond.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/orangefs/devorangefs-req.c |  9 +++++++--
 fs/orangefs/orangefs-kernel.h |  1 +
 fs/orangefs/super.c           | 23 ++++++++++++++++-------
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index c4ab6fdf17a0..e1534c9bab16 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -208,14 +208,19 @@ restart:
 				continue;
 			/*
 			 * Skip ops whose filesystem we don't know about unless
-			 * it is being mounted.
+			 * it is being mounted or unmounted.  It is possible for
+			 * a filesystem we don't know about to be unmounted if
+			 * it fails to mount in the kernel after userspace has
+			 * been sent the mount request.
 			 */
 			/* XXX: is there a better way to detect this? */
 			} else if (ret == -1 &&
 				   !(op->upcall.type ==
 					ORANGEFS_VFS_OP_FS_MOUNT ||
 				     op->upcall.type ==
-					ORANGEFS_VFS_OP_GETATTR)) {
+					ORANGEFS_VFS_OP_GETATTR ||
+				     op->upcall.type ==
+					ORANGEFS_VFS_OP_FS_UMOUNT)) {
 				gossip_debug(GOSSIP_DEV_DEBUG,
 				    "orangefs: skipping op tag %llu %s\n",
 				    llu(op->tag), get_opname_string(op));
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 5e48a0be9761..8afac46fcc87 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -249,6 +249,7 @@ struct orangefs_sb_info_s {
 	char devname[ORANGEFS_MAX_SERVER_ADDR_LEN];
 	struct super_block *sb;
 	int mount_pending;
+	int no_list;
 	struct list_head list;
 };
 
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index cd261c8de53a..629d8c917fa6 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -493,7 +493,7 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 
 	if (ret) {
 		d = ERR_PTR(ret);
-		goto free_op;
+		goto free_sb_and_op;
 	}
 
 	/*
@@ -519,6 +519,9 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 	spin_unlock(&orangefs_superblocks_lock);
 	op_release(new_op);
 
+	/* Must be removed from the list now. */
+	ORANGEFS_SB(sb)->no_list = 0;
+
 	if (orangefs_userspace_version >= 20906) {
 		new_op = op_alloc(ORANGEFS_VFS_OP_FEATURES);
 		if (!new_op)
@@ -533,6 +536,10 @@ struct dentry *orangefs_mount(struct file_system_type *fst,
 
 	return dget(sb->s_root);
 
+free_sb_and_op:
+	/* Will call orangefs_kill_sb with sb not in list. */
+	ORANGEFS_SB(sb)->no_list = 1;
+	deactivate_locked_super(sb);
 free_op:
 	gossip_err("orangefs_mount: mount request failed with %d\n", ret);
 	if (ret == -EINVAL) {
@@ -558,12 +565,14 @@ void orangefs_kill_sb(struct super_block *sb)
 	 */
 	 orangefs_unmount_sb(sb);
 
-	/* remove the sb from our list of orangefs specific sb's */
-
-	spin_lock(&orangefs_superblocks_lock);
-	__list_del_entry(&ORANGEFS_SB(sb)->list);	/* not list_del_init */
-	ORANGEFS_SB(sb)->list.prev = NULL;
-	spin_unlock(&orangefs_superblocks_lock);
+	if (!ORANGEFS_SB(sb)->no_list) {
+		/* remove the sb from our list of orangefs specific sb's */
+		spin_lock(&orangefs_superblocks_lock);
+		/* not list_del_init */
+		__list_del_entry(&ORANGEFS_SB(sb)->list);
+		ORANGEFS_SB(sb)->list.prev = NULL;
+		spin_unlock(&orangefs_superblocks_lock);
+	}
 
 	/*
 	 * make sure that ORANGEFS_DEV_REMOUNT_ALL loop that might've seen us

From 330c418638612d7658b6314e6a244fcb5f7efac5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 16 Apr 2017 23:17:37 +0900
Subject: [PATCH 409/410] Revert "cgroup: avoid attaching a cgroup root to two
 different superblocks"

This reverts commit bfb0b80db5f9dca5ac0a5fd0edb765ee555e5a8e.

Andrei reports CRIU test hangs with the patch applied.  The bug fixed
by the patch isn't too likely to trigger in actual uses.  Revert the
patch for now.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Andrei Vagin <avagin@virtuozzo.com>
Link: http://lkml.kernel.org/r/20170414232737.GC20350@outlook.office365.com
---
 kernel/cgroup/cgroup-v1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 12e19f0636ea..1dc22f6b49f5 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1146,7 +1146,7 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags,
 		 * path is super cold.  Let's just sleep a bit and retry.
 		 */
 		pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
-		if (IS_ERR_OR_NULL(pinned_sb) ||
+		if (IS_ERR(pinned_sb) ||
 		    !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
 			mutex_unlock(&cgroup_mutex);
 			if (!IS_ERR_OR_NULL(pinned_sb))

From 4f7d029b9bf009fbee76bb10c0c4351a1870d2f3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 16 Apr 2017 13:00:18 -0700
Subject: [PATCH 410/410] Linux 4.11-rc7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index efa267a92ba6..5039b9148d15 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*