From 3c7b5858103a41c6c851a7722c73c6fb290104c7 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Thu, 19 May 2016 04:18:35 -0400 Subject: [PATCH] Add test for AES, SHA1 and SHA2 from ARM Crypto extension --- blake2.cpp | 1 + blake2.h | 2 +- cpu.cpp | 149 ++++++++++++++++++++++++++++++++++++++++++++------- cpu.h | 46 +++++++++++++--- validat1.cpp | 6 ++- 5 files changed, 173 insertions(+), 31 deletions(-) diff --git a/blake2.cpp b/blake2.cpp index e72f8648..e1d8d6e7 100644 --- a/blake2.cpp +++ b/blake2.cpp @@ -3933,6 +3933,7 @@ static void BLAKE2_NEON_Compress32(const byte* input, BLAKE2_State& state) { + assert(IsAlignedOn(input,GetAlignmentOf())); assert(IsAlignedOn(&state.h[0],GetAlignmentOf())); assert(IsAlignedOn(&state.h[4],GetAlignmentOf())); assert(IsAlignedOn(&state.t[0],GetAlignmentOf())); diff --git a/blake2.h b/blake2.h index 1f70aa70..d89ee06c 100644 --- a/blake2.h +++ b/blake2.h @@ -143,8 +143,8 @@ struct CRYPTOPP_NO_VTABLE BLAKE2_State // SSE2, SSE4 and NEON depend upon t[] and f[] being side-by-side W h[8], t[2], f[2]; - size_t length; byte buffer[BLOCKSIZE]; + size_t length; }; //! \class BLAKE2_Base diff --git a/cpu.cpp b/cpu.cpp index 1ee18a8e..be34be96 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -94,8 +94,7 @@ bool CpuId(word32 input, word32 output[4]) return true; #else // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 - // http://stackoverflow.com/q/7721854 + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); @@ -149,8 +148,7 @@ static bool TrySSE2() return true; #else // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 - // http://stackoverflow.com/q/7721854 + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); @@ -262,7 +260,7 @@ void DetectX86Features() #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) bool g_ArmDetectionDone = false; -bool g_hasNEON = false, g_hasCRC32 = false, g_hasCrypto = false; +bool g_hasNEON = false, g_hasCRC32 = false, g_hasAES = false, g_hasSHA1 = false, g_hasSHA2 = false; word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; @@ -292,10 +290,22 @@ extern "C" { longjmp(s_jmpNoCRC32, 1); } - static jmp_buf s_jmpNoCrypto; - static void SigIllHandlerCrypto(int) + static jmp_buf s_jmpNoAES; + static void SigIllHandlerAES(int) { - longjmp(s_jmpNoCrypto, 1); + longjmp(s_jmpNoAES, 1); + } + + static jmp_buf s_jmpNoSHA1; + static void SigIllHandlerSHA1(int) + { + longjmp(s_jmpNoSHA1, 1); + } + + static jmp_buf s_jmpNoSHA2; + static void SigIllHandlerSHA2(int) + { + longjmp(s_jmpNoSHA2, 1); } }; #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY @@ -325,8 +335,7 @@ static bool TryNEON() return true; # else // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 - // http://stackoverflow.com/q/7721854 + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); @@ -376,8 +385,7 @@ static bool TryCRC32() return true; # else // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 - // http://stackoverflow.com/q/7721854 + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); @@ -402,7 +410,7 @@ static bool TryCRC32() #endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE } -static bool TryCrypto() +static bool TryAES() { #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) @@ -412,8 +420,7 @@ static bool TryCrypto() static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); uint8x16_t r1 = vaeseq_u8(data, key); uint8x16_t r2 = vaesdq_u8(data, key); - - // + CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2); } __except (EXCEPTION_EXECUTE_HANDLER) { @@ -422,21 +429,121 @@ static bool TryCrypto() return true; # else // longjmp and clobber warnings. Volatile is required. - // http://github.com/weidai11/cryptopp/issues/24 - // http://stackoverflow.com/q/7721854 + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; - SigHandler oldHandler = signal(SIGILL, SigIllHandlerCrypto); + SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES); if (oldHandler == SIG_ERR) result = false; - if (setjmp(s_jmpNoCrypto)) + if (setjmp(s_jmpNoAES)) result = false; else { static const uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); uint8x16_t r1 = vaeseq_u8(data, key); uint8x16_t r2 = vaesdq_u8(data, key); + CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2); + } + + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE +} + +static bool TrySHA1() +{ +#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + __try + { + static const uint32x4_t data = vdupq_n_u32(0); + static const uint32_t hash = 0x0; + + uint32x4_t r1 = vsha1cq_u32 (data, hash, data); + uint32x4_t r2 = vsha1mq_u32 (data, hash, data); + uint32x4_t r3 = vsha1pq_u32 (data, hash, data); + CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return true; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1); + if (oldHandler == SIG_ERR) + result = false; + + if (setjmp(s_jmpNoSHA1)) + result = false; + else + { + static const uint32x4_t data = vdupq_n_u32(0); + static const uint32_t hash = 0x0; + + uint32x4_t r1 = vsha1cq_u32 (data, hash, data); + uint32x4_t r2 = vsha1mq_u32 (data, hash, data); + uint32x4_t r3 = vsha1pq_u32 (data, hash, data); + CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3); + } + + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE +} + +static bool TrySHA2() +{ +#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + __try + { + static const uint32x4_t data = vdupq_n_u32(0); + static const uint32x4_t hash = vdupq_n_u32(0); + + uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); + uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); + uint32x4_t r3 = vsha256su0q_u32 (data, data); + uint32x4_t r4 = vsha256su1q_u32 (data, data, data); + CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4); + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return true; +# else + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2); + if (oldHandler == SIG_ERR) + result = false; + + if (setjmp(s_jmpNoSHA2)) + result = false; + else + { + static const uint32x4_t data = vdupq_n_u32(0); + static const uint32x4_t hash = vdupq_n_u32(0); + + uint32x4_t r1 = vsha256hq_u32 (hash, hash, data); + uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data); + uint32x4_t r3 = vsha256su0q_u32 (data, data); + uint32x4_t r4 = vsha256su1q_u32 (data, data, data); + CRYPTOPP_UNUSED(r1), CRYPTOPP_UNUSED(r2), CRYPTOPP_UNUSED(r3), CRYPTOPP_UNUSED(r4); } signal(SIGILL, oldHandler); @@ -457,7 +564,9 @@ void DetectArmFeatures() { g_hasNEON = TryNEON(); g_hasCRC32 = TryCRC32(); - g_hasCrypto = TryCrypto(); + g_hasAES = TryAES(); + g_hasSHA1 = TrySHA1(); + g_hasSHA2 = TrySHA2(); *((volatile bool*)&g_ArmDetectionDone) = true; } diff --git a/cpu.h b/cpu.h index 7d0b4d7e..cbb80d9a 100644 --- a/cpu.h +++ b/cpu.h @@ -240,7 +240,7 @@ inline int GetCacheLineSize() #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) extern bool g_ArmDetectionDone; -extern bool g_hasNEON, g_hasCRC32, g_hasCrypto; +extern bool g_hasNEON, g_hasCRC32, g_hasAES, g_hasSHA1, g_hasSHA2; void CRYPTOPP_API DetectArmFeatures(); //! \brief Determine if an ARM processor has Advanced SIMD available @@ -271,19 +271,49 @@ inline bool HasCRC32() return g_hasCRC32; } -//! \brief Determine if an ARM processor has Crypto available -//! \returns true if the hardware is capable of Crypto at runtime, false otherwise. -//! \details Crypto instructions provide access to the processor's AES, SHA-1, SHA-224 and SHA-256 intructions. -//! They are provided by ARM C Language Extensions 2.0 (ACLE 2.0) and available under Aarch64 -//! (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an AArch32 execution environment). +//! \brief Determine if an ARM processor has AES available +//! \returns true if the hardware is capable of AES at runtime, false otherwise. +//! \details AES is part of the Crypto extensions from ARM C Language Extensions 2.0 (ACLE 2.0) +//! and available under Aarch64 (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an +//! AArch32 execution environment). //! \details Runtime support requires compile time support. When compiling with GCC, you may //! need to compile with -march=armv8-a+crypto; while Apple requires //! -arch arm64. Also see ARM's __ARM_FEATURE_CRYPTO preprocessor macro. -inline bool HasCrypto() +inline bool HasAES() { if (!g_ArmDetectionDone) DetectArmFeatures(); - return g_hasCrypto; + return g_hasAES; +} + +//! \brief Determine if an ARM processor has SHA1 available +//! \returns true if the hardware is capable of SHA1 at runtime, false otherwise. +//! \details SHA1 is part of the Crypto extensions from ARM C Language Extensions 2.0 (ACLE 2.0) +//! and available under Aarch64 (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an +//! AArch32 execution environment). +//! \details Runtime support requires compile time support. When compiling with GCC, you may +//! need to compile with -march=armv8-a+crypto; while Apple requires +//! -arch arm64. Also see ARM's __ARM_FEATURE_CRYPTO preprocessor macro. +inline bool HasSHA1() +{ + if (!g_ArmDetectionDone) + DetectArmFeatures(); + return g_hasSHA1; +} + +//! \brief Determine if an ARM processor has SHA2 available +//! \returns true if the hardware is capable of SHA2 at runtime, false otherwise. +//! \details SHA2 is part of the Crypto extensions from ARM C Language Extensions 2.0 (ACLE 2.0) +//! and available under Aarch64 (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an +//! AArch32 execution environment). +//! \details Runtime support requires compile time support. When compiling with GCC, you may +//! need to compile with -march=armv8-a+crypto; while Apple requires +//! -arch arm64. Also see ARM's __ARM_FEATURE_CRYPTO preprocessor macro. +inline bool HasSHA2() +{ + if (!g_ArmDetectionDone) + DetectArmFeatures(); + return g_hasSHA2; } //! \brief Provides the cache line size at runtime diff --git a/validat1.cpp b/validat1.cpp index 38731bd9..69713c28 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -315,10 +315,12 @@ bool TestSettings() #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) bool hasNEON = HasNEON(); bool hasCRC32 = HasCRC32(); - bool hasCrypto = HasCrypto(); + bool hasAES = HasAES(); + bool hasSHA1 = HasSHA1(); + bool hasSHA2 = HasSHA2(); cout << "passed: "; - cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << ", hasCrypto == " << hasCrypto << endl; + cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << ", hasAES == " << hasAES << ", hasSHA1 == " << hasSHA1 << ", hasSHA2 == " << hasSHA2 << endl; #endif if (!pass)