diff --git a/cpu.cpp b/cpu.cpp index 13e15b36..27d5799d 100644 --- a/cpu.cpp +++ b/cpu.cpp @@ -358,6 +358,7 @@ void DetectX86Features() #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false; +bool CRYPTOPP_SECTION_INIT g_hasARMv7 = false; bool CRYPTOPP_SECTION_INIT g_hasNEON = false; bool CRYPTOPP_SECTION_INIT g_hasPMULL = false; bool CRYPTOPP_SECTION_INIT g_hasCRC32 = false; @@ -379,6 +380,7 @@ word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; // http://stackoverflow.com/a/11197770/608639 and // http://gist.github.com/erkanyildiz/390a480f27e86f8cd6ba +extern bool CPU_ProbeARMv7(); extern bool CPU_ProbeNEON(); extern bool CPU_ProbeCRC32(); extern bool CPU_ProbeAES(); @@ -386,6 +388,9 @@ extern bool CPU_ProbeSHA1(); extern bool CPU_ProbeSHA2(); extern bool CPU_ProbePMULL(); +#ifndef HWCAP_ARMv7 +# define HWCAP_ARMv7 (1 << 29) +#endif #ifndef HWCAP_ASIMD # define HWCAP_ASIMD (1 << 1) #endif @@ -423,6 +428,25 @@ extern bool CPU_ProbePMULL(); # define HWCAP2_SHA2 (1 << 3) #endif +inline bool CPU_QueryARMv7() +{ +#if defined(__aarch32__) || defined(__aarch64__) + // ARMv7 or above + return true; +#elif defined(__ANDROID__) && defined(__arm__) + if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM) != 0) && + ((android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_ARMv7) != 0)) + return true; +#elif defined(__linux__) && defined(__arm__) + if ((getauxval(AT_HWCAP) & HWCAP_ARMv7) != 0) + return true; +#elif defined(__APPLE__) && defined(__arm__) + // Apple hardware is ARMv7 or above. + return true; +#endif + return false; +} + inline bool CPU_QueryNEON() { #if defined(__ANDROID__) && defined(__aarch64__) @@ -430,7 +454,7 @@ inline bool CPU_QueryNEON() ((android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_ASIMD) != 0)) return true; #elif defined(__ANDROID__) && defined(__arm__) - if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM) != 0) && + if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM) != 0) && ((android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_NEON) != 0)) return true; #elif defined(__linux__) && defined(__aarch64__) @@ -456,7 +480,7 @@ inline bool CPU_QueryCRC32() ((android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_CRC32) != 0)) return true; #elif defined(__ANDROID__) && defined(__aarch32__) - if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM) != 0) && + if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM) != 0) && ((android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_CRC32) != 0)) return true; #elif defined(__linux__) && defined(__aarch64__) @@ -475,7 +499,7 @@ inline bool CPU_QueryCRC32() inline bool CPU_QueryPMULL() { #if defined(__ANDROID__) && defined(__aarch64__) - if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM64) != 0) && + if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM64) != 0) && ((android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_PMULL) != 0)) return true; #elif defined(__ANDROID__) && defined(__aarch32__) @@ -498,11 +522,11 @@ inline bool CPU_QueryPMULL() inline bool CPU_QueryAES() { #if defined(__ANDROID__) && defined(__aarch64__) - if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM64) != 0) && + if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM64) != 0) && ((android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_AES) != 0)) return true; #elif defined(__ANDROID__) && defined(__aarch32__) - if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM) != 0) && + if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM) != 0) && ((android_getCpuFeatures() & ANDROID_CPU_ARM_FEATURE_AES) != 0)) return true; #elif defined(__linux__) && defined(__aarch64__) @@ -568,7 +592,7 @@ inline bool CPU_QuerySHA1() inline bool CPU_QuerySHA2() { #if defined(__ANDROID__) && defined(__aarch64__) - if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM64) != 0) && + if (((android_getCpuFamily() & ANDROID_CPU_FAMILY_ARM64) != 0) && ((android_getCpuFeatures() & ANDROID_CPU_ARM64_FEATURE_SHA2) != 0)) return true; #elif defined(__ANDROID__) && defined(__aarch32__) @@ -604,7 +628,8 @@ void DetectArmFeatures() { // The CPU_ProbeXXX's return false for OSes which // can't tolerate SIGILL-based probes - g_hasNEON = CPU_QueryNEON() || CPU_ProbeNEON(); + g_hasARMv7 = CPU_QueryARMv7() || CPU_ProbeARMv7(); + g_hasNEON = CPU_QueryNEON() || CPU_ProbeNEON(); g_hasCRC32 = CPU_QueryCRC32() || CPU_ProbeCRC32(); g_hasPMULL = CPU_QueryPMULL() || CPU_ProbePMULL(); g_hasAES = CPU_QueryAES() || CPU_ProbeAES(); diff --git a/cpu.h b/cpu.h index 0d555b4f..577b1ab0 100644 --- a/cpu.h +++ b/cpu.h @@ -342,13 +342,29 @@ inline int GetCacheLineSize() // Hide from Doxygen #ifndef CRYPTOPP_DOXYGEN_PROCESSING extern bool g_ArmDetectionDone; -extern bool g_hasNEON, g_hasPMULL, g_hasCRC32, g_hasAES, g_hasSHA1, g_hasSHA2; +extern bool g_hasARMv7, g_hasNEON, g_hasPMULL, g_hasCRC32, g_hasAES, g_hasSHA1, g_hasSHA2; void CRYPTOPP_API DetectArmFeatures(); #endif // CRYPTOPP_DOXYGEN_PROCESSING /// \name ARM A-32, Aarch32 and AArch64 CPU FEATURES //@{ +/// \brief Determine if an ARM processor is ARMv7 or above +/// \returns true if the hardware is ARMv7 or above, false otherwise. +/// \details Some AES code requires ARMv7 or above +/// \note This function is only available on ARM-32, Aarch32 and Aarch64 platforms +inline bool HasARMv7() +{ + // ASIMD is a core feature on Aarch32 and Aarch64 like SSE2 is a core feature on x86_64 +#if defined(__aarch32__) || defined(__aarch64__) + return true; +#else + if (!g_ArmDetectionDone) + DetectArmFeatures(); + return g_hasARMv7; +#endif +} + /// \brief Determine if an ARM processor has Advanced SIMD available /// \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise. /// \details Advanced SIMD instructions are available under most ARMv7, Aarch32 and Aarch64. diff --git a/neon-simd.cpp b/neon-simd.cpp index 72d86fc0..dcc4d3a0 100644 --- a/neon-simd.cpp +++ b/neon-simd.cpp @@ -47,6 +47,58 @@ extern "C" { } #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY +bool CPU_ProbeARMv7() +{ +#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) + return false; +#elif (CRYPTOPP_ARM_NEON_AVAILABLE) +# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) + volatile bool result = true; + __try + { + // Modern MS hardware is ARMv7 + result = true; + } + __except (EXCEPTION_EXECUTE_HANDLER) + { + return false; + } + return result; +# else + + // longjmp and clobber warnings. Volatile is required. + // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 + volatile bool result = true; + + volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); + if (oldHandler == SIG_ERR) + return false; + + volatile sigset_t oldMask; + if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) + return false; + + if (setjmp(s_jmpSIGILL)) + result = false; + else + { + // ARMv7 added movt and movw + int a; + asm volatile("movw %0,%1 \n" + "movt %0,%1 \n" + : "=r"(a) : "i"(0x1234)); + result = (a == 0x12341234); + } + + sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); + signal(SIGILL, oldHandler); + return result; +# endif +#else + return false; +#endif // CRYPTOPP_ARM_NEON_AVAILABLE +} + bool CPU_ProbeNEON() { #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) diff --git a/validat1.cpp b/validat1.cpp index 81c40d17..16b5fa92 100644 --- a/validat1.cpp +++ b/validat1.cpp @@ -383,13 +383,16 @@ bool TestSettings() bool hasSHA = HasSHA(); bool isP4 = IsP4(); - std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41; - std::cout << ", hasSSE4.2 == " << hasSSE42 << ", hasAVX == " << hasAVX << ", hasAVX2 == " << hasAVX2; - std::cout << ", hasAESNI == " << hasAESNI << ", hasCLMUL == " << hasCLMUL << ", hasRDRAND == " << hasRDRAND; - std::cout << ", hasRDSEED == " << hasRDSEED << ", hasSHA == " << hasSHA << ", isP4 == " << isP4; + std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3; + std::cout << ", hasSSE4.1 == " << hasSSE41 << ", hasSSE4.2 == " << hasSSE42; + std::cout << ", hasAVX == " << hasAVX << ", hasAVX2 == " << hasAVX2; + std::cout << ", hasAESNI == " << hasAESNI << ", hasCLMUL == " << hasCLMUL; + std::cout << ", hasRDRAND == " << hasRDRAND << ", hasRDSEED == " << hasRDSEED; + std::cout << ", hasSHA == " << hasSHA << ", isP4 == " << isP4; std::cout << "\n"; #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) + bool hasARMv7 = HasARMv7(); bool hasNEON = HasNEON(); bool hasCRC32 = HasCRC32(); bool hasPMULL = HasPMULL(); @@ -398,8 +401,10 @@ bool TestSettings() bool hasSHA2 = HasSHA2(); std::cout << "passed: "; - std::cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << ", hasPMULL == " << hasPMULL; - std::cout << ", hasAES == " << hasAES << ", hasSHA1 == " << hasSHA1 << ", hasSHA2 == " << hasSHA2 << "\n"; + std::cout << "hasARMv7 == " << hasARMv7 << ", hasNEON == " << hasNEON; + std::cout << ", hasCRC32 == " << hasCRC32 << ", hasPMULL == " << hasPMULL; + std::cout << ", hasAES == " << hasAES << ", hasSHA1 == " << hasSHA1; + std::cout << ", hasSHA2 == " << hasSHA2 << "\n"; #elif (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64) const bool hasAltivec = HasAltivec(); @@ -410,8 +415,9 @@ bool TestSettings() const bool hasSHA512 = HasSHA512(); std::cout << "passed: "; - std::cout << "hasAltivec == " << hasAltivec << ", hasPower7 == " << hasPower7 << ", hasPower8 == " << hasPower8; - std::cout << ", hasAES == " << hasAES << ", hasSHA256 == " << hasSHA256 << ", hasSHA512 == " << hasSHA512 << "\n"; + std::cout << "hasAltivec == " << hasAltivec << ", hasPower7 == " << hasPower7; + std::cout << ", hasPower8 == " << hasPower8 << ", hasAES == " << hasAES; + std::cout << ", hasSHA256 == " << hasSHA256 << ", hasSHA512 == " << hasSHA512 << "\n"; #endif