From d9797c414a95403364db9f834a2e59a7ac41e5d8 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sat, 23 Jun 2018 14:19:18 -0400 Subject: [PATCH] Add CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS for LEA --- lea-simd.cpp | 8 +++++++- lea.cpp | 4 ++++ lea.h | 8 ++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lea-simd.cpp b/lea-simd.cpp index 8dacdf56..703930c9 100644 --- a/lea-simd.cpp +++ b/lea-simd.cpp @@ -140,7 +140,11 @@ uint32x4_t UnpackHigh64(uint32x4_t a, uint32x4_t b) template inline uint32x4_t LoadKey(const word32 rkey[]) { +#if (CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS) return vld1q_u32(&rkey[IDX*4]); +#else + return vdupq_n_u32(rkey[IDX]); +#endif } template @@ -1012,15 +1016,17 @@ size_t LEA_Dec_AdvancedProcessBlocks_SSSE3(const word32* subKeys, size_t rounds, #endif // CRYPTOPP_SSSE3_AVAILABLE #if defined(CRYPTOPP_ARM_NEON_AVAILABLE) +# if (CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS) void LEA_SplatKeys_NEON(SecBlock& rkeys) { SecBlock temp(rkeys.size() * 4); for (size_t i=0, j=0; i& rkeys); +# endif // CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS extern size_t LEA_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds, const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags); @@ -616,8 +618,10 @@ void LEA::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, con LEA_SplatKeys_SSSE3(m_rkey); #endif #if (CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS) && (CRYPTOPP_ARM_NEON_AVAILABLE) +# if (CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS) if (HasNEON()) LEA_SplatKeys_NEON(m_rkey); +# endif // CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS #endif } diff --git a/lea.h b/lea.h index 02fccd45..5b9f74b0 100644 --- a/lea.h +++ b/lea.h @@ -19,6 +19,14 @@ # define CRYPTOPP_LEA_ADVANCED_PROCESS_BLOCKS 1 #endif +// Define this if you want to pre-splat the round key table +// for NEON and Aarch64. Pre-splatting the round key increases +// performance by about 0.7 cpb on ARM server boards like an +// AMD Opteron A1100. However, it crushes performance on ARM +// dev-boards like LeMaker HiKey and Pine64. HiKey and Pine64 +// run about 8 cpb slower when pre-splatting the round keys. +// # define CRYPTOPP_LEA_ARM_SPLAT_ROUNDKEYS 1 + NAMESPACE_BEGIN(CryptoPP) /// \brief LEA block cipher information