From b9688ec5e67b8434b20eb4afc2d2dfea0d9608a9 Mon Sep 17 00:00:00 2001
From: Jeffrey Walton <noloader@gmail.com>
Date: Wed, 26 Dec 2018 13:24:54 -0500
Subject: [PATCH] Fix "explicit template specialization cannot have a storage
 class" on Aarch64 debug builds

---
 adv_simd.h        | 32 ++++++++++++--------------------
 speck128_simd.cpp | 44 ++++++++++++++++++--------------------------
 speck64_simd.cpp  | 42 +++++++++++++++++++++---------------------
 3 files changed, 51 insertions(+), 67 deletions(-)
diff --git a/adv_simd.h b/adv_simd.h
index ce468012..41d68ff1 100644
--- a/adv_simd.h
+++ b/adv_simd.h
@@ -74,14 +74,6 @@
 # include "ppc_simd.h"
 #endif
 
-#ifndef CRYPTOPP_INLINE
-# if defined(CRYPTOPP_DEBUG)
-#  define CRYPTOPP_INLINE static
-# else
-#  define CRYPTOPP_INLINE inline
-# endif
-#endif
-
 // ************************ All block ciphers *********************** //
 
 ANONYMOUS_NAMESPACE_BEGIN
@@ -111,7 +103,7 @@ NAMESPACE_BEGIN(CryptoPP)
 /// \details The subkey type is usually word32 or word64. F2 and F6 must use the
 ///   same word type.
 template <typename F2, typename F6, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
+inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
         const W *subKeys, size_t rounds, const byte *inBlocks,
         const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -356,7 +348,7 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
 /// \details The subkey type is usually word32 or word64. F1 and F6 must use the
 ///   same word type.
 template <typename F1, typename F6, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_NEON(F1 func1, F6 func6,
+inline size_t AdvancedProcessBlocks128_6x1_NEON(F1 func1, F6 func6,
             const W *subKeys, size_t rounds, const byte *inBlocks,
             const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -518,7 +510,7 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_NEON(F1 func1, F6 func6,
 ///   vector type. The V parameter is used to avoid template argument
 ///   deduction/substitution failures.
 template <typename F1, typename F4, typename W, typename V>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
+inline size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
             const V& unused, const W *subKeys, size_t rounds, const byte *inBlocks,
             const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -659,7 +651,7 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
 /// \details The subkey type is usually word32 or word64. F2 and F6 must use the
 ///   same word type.
 template <typename F2, typename F6, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
+inline size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
             const W *subKeys, size_t rounds, const byte *inBlocks,
             const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -890,7 +882,7 @@ NAMESPACE_BEGIN(CryptoPP)
 /// \details The subkey type is usually word32 or word64. F1 and F2 must use the
 ///   same word type.
 template <typename F1, typename F2, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_2x1_SSE(F1 func1, F2 func2,
+inline size_t AdvancedProcessBlocks64_2x1_SSE(F1 func1, F2 func2,
         MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
         const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -1045,7 +1037,7 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_2x1_SSE(F1 func1, F2 func2,
 /// \details The subkey type is usually word32 or word64. F2 and F6 must use the
 ///   same word type.
 template <typename F2, typename F6, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
+inline size_t AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
         MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
         const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -1296,7 +1288,7 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
 /// \details The subkey type is usually word32 or word64. F2 and F6 must use the
 ///   same word type.
 template <typename F2, typename F6, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
+inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
         MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
         const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -1491,7 +1483,7 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
 /// \details The subkey type is usually word32 or word64. F1 and F4 must use the
 ///   same word type.
 template <typename F1, typename F4, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
+inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
         MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
         const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -1622,7 +1614,7 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
 /// \details The subkey type is usually word32 or word64. F1 and F4 must use the
 ///   same word type.
 template <typename F1, typename F4, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
+inline size_t AdvancedProcessBlocks64_4x1_SSE(F1 func1, F4 func4,
     MAYBE_CONST W *subKeys, size_t rounds, const byte *inBlocks,
     const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -1805,7 +1797,7 @@ NAMESPACE_BEGIN(CryptoPP)
 /// \details The subkey type is usually word32 or word64. F2 and F6 must use the
 ///   same word type.
 template <typename F2, typename F6, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
+inline size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
         const W *subKeys, size_t rounds, const byte *inBlocks,
         const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -2076,7 +2068,7 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks64_6x2_ALTIVEC(F2 func2, F6 func6,
 /// \details The subkey type is usually word32 or word64. F1 and F4 must use the
 ///   same word type.
 template <typename F1, typename F4, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
+inline size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
         const W *subKeys, size_t rounds, const byte *inBlocks,
         const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
@@ -2221,7 +2213,7 @@ CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_4x1_ALTIVEC(F1 func1, F4 func4,
 /// \details The subkey type is usually word32 or word64. F1 and F6 must use the
 ///   same word type.
 template <typename F1, typename F6, typename W>
-CRYPTOPP_INLINE size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
+inline size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6,
         const W *subKeys, size_t rounds, const byte *inBlocks,
         const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
 {
diff --git a/speck128_simd.cpp b/speck128_simd.cpp
index d59b1b1e..e42f62f5 100644
--- a/speck128_simd.cpp
+++ b/speck128_simd.cpp
@@ -12,14 +12,6 @@
 #include "misc.h"
 #include "adv_simd.h"
 
-#ifndef CRYPTOPP_INLINE
-# if defined(CRYPTOPP_DEBUG)
-#  define CRYPTOPP_INLINE static
-# else
-#  define CRYPTOPP_INLINE inline
-# endif
-#endif
-
 // Uncomment for benchmarking C++ against SSE or NEON.
 // Do so in both speck.cpp and speck-simd.cpp.
 // #undef CRYPTOPP_SSSE3_AVAILABLE
@@ -68,7 +60,7 @@ using CryptoPP::word64;
 #if (CRYPTOPP_ARM_NEON_AVAILABLE)
 
 template <class T>
-CRYPTOPP_INLINE T UnpackHigh64(const T& a, const T& b)
+inline T UnpackHigh64(const T& a, const T& b)
 {
     const uint64x1_t x(vget_high_u64((uint64x2_t)a));
     const uint64x1_t y(vget_high_u64((uint64x2_t)b));
@@ -76,7 +68,7 @@ CRYPTOPP_INLINE T UnpackHigh64(const T& a, const T& b)
 }
 
 template <class T>
-CRYPTOPP_INLINE T UnpackLow64(const T& a, const T& b)
+inline T UnpackLow64(const T& a, const T& b)
 {
     const uint64x1_t x(vget_low_u64((uint64x2_t)a));
     const uint64x1_t y(vget_low_u64((uint64x2_t)b));
@@ -84,7 +76,7 @@ CRYPTOPP_INLINE T UnpackLow64(const T& a, const T& b)
 }
 
 template <unsigned int R>
-CRYPTOPP_INLINE uint64x2_t RotateLeft64(const uint64x2_t& val)
+inline uint64x2_t RotateLeft64(const uint64x2_t& val)
 {
     const uint64x2_t a(vshlq_n_u64(val, R));
     const uint64x2_t b(vshrq_n_u64(val, 64 - R));
@@ -92,7 +84,7 @@ CRYPTOPP_INLINE uint64x2_t RotateLeft64(const uint64x2_t& val)
 }
 
 template <unsigned int R>
-CRYPTOPP_INLINE uint64x2_t RotateRight64(const uint64x2_t& val)
+inline uint64x2_t RotateRight64(const uint64x2_t& val)
 {
     const uint64x2_t a(vshlq_n_u64(val, 64 - R));
     const uint64x2_t b(vshrq_n_u64(val, R));
@@ -102,7 +94,7 @@ CRYPTOPP_INLINE uint64x2_t RotateRight64(const uint64x2_t& val)
 #if defined(__aarch32__) || defined(__aarch64__)
 // Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
 template <>
-CRYPTOPP_INLINE uint64x2_t RotateLeft64<8>(const uint64x2_t& val)
+inline uint64x2_t RotateLeft64<8>(const uint64x2_t& val)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
     const uint8_t maskb[16] = { 14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7 };
@@ -118,7 +110,7 @@ CRYPTOPP_INLINE uint64x2_t RotateLeft64<8>(const uint64x2_t& val)
 
 // Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
 template <>
-CRYPTOPP_INLINE uint64x2_t RotateRight64<8>(const uint64x2_t& val)
+inline uint64x2_t RotateRight64<8>(const uint64x2_t& val)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
     const uint8_t maskb[16] = { 8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1 };
@@ -133,7 +125,7 @@ CRYPTOPP_INLINE uint64x2_t RotateRight64<8>(const uint64x2_t& val)
 }
 #endif
 
-CRYPTOPP_INLINE void SPECK128_Enc_Block(uint64x2_t &block0, uint64x2_t &block1,
+inline void SPECK128_Enc_Block(uint64x2_t &block0, uint64x2_t &block1,
     const word64 *subkeys, unsigned int rounds)
 {
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
@@ -156,7 +148,7 @@ CRYPTOPP_INLINE void SPECK128_Enc_Block(uint64x2_t &block0, uint64x2_t &block1,
     block1 = UnpackHigh64(y1, x1);
 }
 
-CRYPTOPP_INLINE void SPECK128_Enc_6_Blocks(uint64x2_t &block0, uint64x2_t &block1,
+inline void SPECK128_Enc_6_Blocks(uint64x2_t &block0, uint64x2_t &block1,
     uint64x2_t &block2, uint64x2_t &block3, uint64x2_t &block4, uint64x2_t &block5,
     const word64 *subkeys, unsigned int rounds)
 {
@@ -198,7 +190,7 @@ CRYPTOPP_INLINE void SPECK128_Enc_6_Blocks(uint64x2_t &block0, uint64x2_t &block
     block5 = UnpackHigh64(y3, x3);
 }
 
-CRYPTOPP_INLINE void SPECK128_Dec_Block(uint64x2_t &block0, uint64x2_t &block1,
+inline void SPECK128_Dec_Block(uint64x2_t &block0, uint64x2_t &block1,
     const word64 *subkeys, unsigned int rounds)
 {
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
@@ -221,7 +213,7 @@ CRYPTOPP_INLINE void SPECK128_Dec_Block(uint64x2_t &block0, uint64x2_t &block1,
     block1 = UnpackHigh64(y1, x1);
 }
 
-CRYPTOPP_INLINE void SPECK128_Dec_6_Blocks(uint64x2_t &block0, uint64x2_t &block1,
+inline void SPECK128_Dec_6_Blocks(uint64x2_t &block0, uint64x2_t &block1,
     uint64x2_t &block2, uint64x2_t &block3, uint64x2_t &block4, uint64x2_t &block5,
     const word64 *subkeys, unsigned int rounds)
 {
@@ -286,7 +278,7 @@ CRYPTOPP_INLINE void SPECK128_Dec_6_Blocks(uint64x2_t &block0, uint64x2_t &block
 #endif
 
 template <unsigned int R>
-CRYPTOPP_INLINE __m128i RotateLeft64(const __m128i& val)
+inline __m128i RotateLeft64(const __m128i& val)
 {
 #if defined(CRYPTOPP_AVX512_ROTATE)
     return _mm_rol_epi64(val, R);
@@ -299,7 +291,7 @@ CRYPTOPP_INLINE __m128i RotateLeft64(const __m128i& val)
 }
 
 template <unsigned int R>
-CRYPTOPP_INLINE __m128i RotateRight64(const __m128i& val)
+inline __m128i RotateRight64(const __m128i& val)
 {
 #if defined(CRYPTOPP_AVX512_ROTATE)
     return _mm_ror_epi64(val, R);
@@ -335,7 +327,7 @@ __m128i RotateRight64<8>(const __m128i& val)
 #endif
 }
 
-CRYPTOPP_INLINE void SPECK128_Enc_Block(__m128i &block0, __m128i &block1,
+inline void SPECK128_Enc_Block(__m128i &block0, __m128i &block1,
     const word64 *subkeys, unsigned int rounds)
 {
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
@@ -359,7 +351,7 @@ CRYPTOPP_INLINE void SPECK128_Enc_Block(__m128i &block0, __m128i &block1,
     block1 = _mm_unpackhi_epi64(y1, x1);
 }
 
-CRYPTOPP_INLINE void SPECK128_Enc_6_Blocks(__m128i &block0, __m128i &block1,
+inline void SPECK128_Enc_6_Blocks(__m128i &block0, __m128i &block1,
     __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
     const word64 *subkeys, unsigned int rounds)
 {
@@ -402,7 +394,7 @@ CRYPTOPP_INLINE void SPECK128_Enc_6_Blocks(__m128i &block0, __m128i &block1,
     block5 = _mm_unpackhi_epi64(y3, x3);
 }
 
-CRYPTOPP_INLINE void SPECK128_Dec_Block(__m128i &block0, __m128i &block1,
+inline void SPECK128_Dec_Block(__m128i &block0, __m128i &block1,
     const word64 *subkeys, unsigned int rounds)
 {
     // [A1 A2][B1 B2] ... => [A1 B1][A2 B2] ...
@@ -426,7 +418,7 @@ CRYPTOPP_INLINE void SPECK128_Dec_Block(__m128i &block0, __m128i &block1,
     block1 = _mm_unpackhi_epi64(y1, x1);
 }
 
-CRYPTOPP_INLINE void SPECK128_Dec_6_Blocks(__m128i &block0, __m128i &block1,
+inline void SPECK128_Dec_6_Blocks(__m128i &block0, __m128i &block1,
     __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
     const word64 *subkeys, unsigned int rounds)
 {
@@ -486,7 +478,7 @@ using CryptoPP::VecPermute;
 
 // Rotate left by bit count
 template<unsigned int C>
-CRYPTOPP_INLINE uint64x2_p RotateLeft64(const uint64x2_p val)
+inline uint64x2_p RotateLeft64(const uint64x2_p val)
 {
     const uint64x2_p m = {C, C};
     return vec_rl(val, m);
@@ -494,7 +486,7 @@ CRYPTOPP_INLINE uint64x2_p RotateLeft64(const uint64x2_p val)
 
 // Rotate right by bit count
 template<unsigned int C>
-CRYPTOPP_INLINE uint64x2_p RotateRight64(const uint64x2_p val)
+inline uint64x2_p RotateRight64(const uint64x2_p val)
 {
     const uint64x2_p m = {64-C, 64-C};
     return vec_rl(val, m);
diff --git a/speck64_simd.cpp b/speck64_simd.cpp
index b8f8b2b2..36d76ff0 100644
--- a/speck64_simd.cpp
+++ b/speck64_simd.cpp
@@ -50,11 +50,11 @@
 # include "ppc_simd.h"
 #endif
 
-#ifndef CRYPTOPP_INLINE
+#ifndef inline
 # if defined(CRYPTOPP_DEBUG)
-#  define CRYPTOPP_INLINE static
+#  define inline static
 # else
-#  define CRYPTOPP_INLINE inline
+#  define inline inline
 # endif
 #endif
 
@@ -72,7 +72,7 @@ using CryptoPP::word64;
 #if (CRYPTOPP_ARM_NEON_AVAILABLE)
 
 template <class T>
-CRYPTOPP_INLINE T UnpackHigh32(const T& a, const T& b)
+inline T UnpackHigh32(const T& a, const T& b)
 {
     const uint32x2_t x(vget_high_u32((uint32x4_t)a));
     const uint32x2_t y(vget_high_u32((uint32x4_t)b));
@@ -81,7 +81,7 @@ CRYPTOPP_INLINE T UnpackHigh32(const T& a, const T& b)
 }
 
 template <class T>
-CRYPTOPP_INLINE T UnpackLow32(const T& a, const T& b)
+inline T UnpackLow32(const T& a, const T& b)
 {
     const uint32x2_t x(vget_low_u32((uint32x4_t)a));
     const uint32x2_t y(vget_low_u32((uint32x4_t)b));
@@ -90,7 +90,7 @@ CRYPTOPP_INLINE T UnpackLow32(const T& a, const T& b)
 }
 
 template <unsigned int R>
-CRYPTOPP_INLINE uint32x4_t RotateLeft32(const uint32x4_t& val)
+inline uint32x4_t RotateLeft32(const uint32x4_t& val)
 {
     const uint32x4_t a(vshlq_n_u32(val, R));
     const uint32x4_t b(vshrq_n_u32(val, 32 - R));
@@ -98,7 +98,7 @@ CRYPTOPP_INLINE uint32x4_t RotateLeft32(const uint32x4_t& val)
 }
 
 template <unsigned int R>
-CRYPTOPP_INLINE uint32x4_t RotateRight32(const uint32x4_t& val)
+inline uint32x4_t RotateRight32(const uint32x4_t& val)
 {
     const uint32x4_t a(vshlq_n_u32(val, 32 - R));
     const uint32x4_t b(vshrq_n_u32(val, R));
@@ -108,7 +108,7 @@ CRYPTOPP_INLINE uint32x4_t RotateRight32(const uint32x4_t& val)
 #if defined(__aarch32__) || defined(__aarch64__)
 // Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
 template <>
-CRYPTOPP_INLINE uint32x4_t RotateLeft32<8>(const uint32x4_t& val)
+inline uint32x4_t RotateLeft32<8>(const uint32x4_t& val)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
     const uint8_t maskb[16] = { 14,13,12,15, 10,9,8,11, 6,5,4,7, 2,1,0,3 };
@@ -124,7 +124,7 @@ CRYPTOPP_INLINE uint32x4_t RotateLeft32<8>(const uint32x4_t& val)
 
 // Faster than two Shifts and an Or. Thanks to Louis Wingers and Bryan Weeks.
 template <>
-CRYPTOPP_INLINE uint32x4_t RotateRight32<8>(const uint32x4_t& val)
+inline uint32x4_t RotateRight32<8>(const uint32x4_t& val)
 {
 #if (CRYPTOPP_BIG_ENDIAN)
     const uint8_t maskb[16] = { 12,15,14,13, 8,11,10,9, 4,7,6,5, 0,3,2,1 };
@@ -139,7 +139,7 @@ CRYPTOPP_INLINE uint32x4_t RotateRight32<8>(const uint32x4_t& val)
 }
 #endif  // Aarch32 or Aarch64
 
-CRYPTOPP_INLINE void SPECK64_Enc_Block(uint32x4_t &block0, uint32x4_t &block1,
+inline void SPECK64_Enc_Block(uint32x4_t &block0, uint32x4_t &block1,
     const word32 *subkeys, unsigned int rounds)
 {
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
@@ -162,7 +162,7 @@ CRYPTOPP_INLINE void SPECK64_Enc_Block(uint32x4_t &block0, uint32x4_t &block1,
     block1 = UnpackHigh32(y1, x1);
 }
 
-CRYPTOPP_INLINE void SPECK64_Dec_Block(uint32x4_t &block0, uint32x4_t &block1,
+inline void SPECK64_Dec_Block(uint32x4_t &block0, uint32x4_t &block1,
     const word32 *subkeys, unsigned int rounds)
 {
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
@@ -185,7 +185,7 @@ CRYPTOPP_INLINE void SPECK64_Dec_Block(uint32x4_t &block0, uint32x4_t &block1,
     block1 = UnpackHigh32(y1, x1);
 }
 
-CRYPTOPP_INLINE void SPECK64_Enc_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
+inline void SPECK64_Enc_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
     uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
     const word32 *subkeys, unsigned int rounds)
 {
@@ -227,7 +227,7 @@ CRYPTOPP_INLINE void SPECK64_Enc_6_Blocks(uint32x4_t &block0, uint32x4_t &block1
     block5 = UnpackHigh32(y3, x3);
 }
 
-CRYPTOPP_INLINE void SPECK64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
+inline void SPECK64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1,
     uint32x4_t &block2, uint32x4_t &block3, uint32x4_t &block4, uint32x4_t &block5,
     const word32 *subkeys, unsigned int rounds)
 {
@@ -276,7 +276,7 @@ CRYPTOPP_INLINE void SPECK64_Dec_6_Blocks(uint32x4_t &block0, uint32x4_t &block1
 #if defined(CRYPTOPP_SSE41_AVAILABLE)
 
 template <unsigned int R>
-CRYPTOPP_INLINE __m128i RotateLeft32(const __m128i& val)
+inline __m128i RotateLeft32(const __m128i& val)
 {
 #if defined(__XOP__)
     return _mm_roti_epi32(val, R);
@@ -287,7 +287,7 @@ CRYPTOPP_INLINE __m128i RotateLeft32(const __m128i& val)
 }
 
 template <unsigned int R>
-CRYPTOPP_INLINE __m128i RotateRight32(const __m128i& val)
+inline __m128i RotateRight32(const __m128i& val)
 {
 #if defined(__XOP__)
     return _mm_roti_epi32(val, 32-R);
@@ -321,7 +321,7 @@ __m128i RotateRight32<8>(const __m128i& val)
 #endif
 }
 
-CRYPTOPP_INLINE void SPECK64_Enc_Block(__m128i &block0, __m128i &block1,
+inline void SPECK64_Enc_Block(__m128i &block0, __m128i &block1,
     const word32 *subkeys, unsigned int rounds)
 {
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
@@ -347,7 +347,7 @@ CRYPTOPP_INLINE void SPECK64_Enc_Block(__m128i &block0, __m128i &block1,
     block1 = _mm_unpackhi_epi32(y1, x1);
 }
 
-CRYPTOPP_INLINE void SPECK64_Dec_Block(__m128i &block0, __m128i &block1,
+inline void SPECK64_Dec_Block(__m128i &block0, __m128i &block1,
     const word32 *subkeys, unsigned int rounds)
 {
     // [A1 A2 A3 A4][B1 B2 B3 B4] ... => [A1 A3 B1 B3][A2 A4 B2 B4] ...
@@ -373,7 +373,7 @@ CRYPTOPP_INLINE void SPECK64_Dec_Block(__m128i &block0, __m128i &block1,
     block1 = _mm_unpackhi_epi32(y1, x1);
 }
 
-CRYPTOPP_INLINE void SPECK64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
+inline void SPECK64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
     __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
     const word32 *subkeys, unsigned int rounds)
 {
@@ -424,7 +424,7 @@ CRYPTOPP_INLINE void SPECK64_Enc_6_Blocks(__m128i &block0, __m128i &block1,
     block5 = _mm_unpackhi_epi32(y3, x3);
 }
 
-CRYPTOPP_INLINE void SPECK64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
+inline void SPECK64_Dec_6_Blocks(__m128i &block0, __m128i &block1,
     __m128i &block2, __m128i &block3, __m128i &block4, __m128i &block5,
     const word32 *subkeys, unsigned int rounds)
 {
@@ -491,7 +491,7 @@ using CryptoPP::VecPermute;
 
 // Rotate left by bit count
 template<unsigned int C>
-CRYPTOPP_INLINE uint32x4_p RotateLeft32(const uint32x4_p val)
+inline uint32x4_p RotateLeft32(const uint32x4_p val)
 {
     const uint32x4_p m = {C, C, C, C};
     return vec_rl(val, m);
@@ -499,7 +499,7 @@ CRYPTOPP_INLINE uint32x4_p RotateLeft32(const uint32x4_p val)
 
 // Rotate right by bit count
 template<unsigned int C>
-CRYPTOPP_INLINE uint32x4_p RotateRight32(const uint32x4_p val)
+inline uint32x4_p RotateRight32(const uint32x4_p val)
 {
     const uint32x4_p m = {32-C, 32-C, 32-C, 32-C};
     return vec_rl(val, m);