Add AVX and AVX2 runtime feature detection (GH #671)

There are no corresponding defines in config.h at the moment. Programs will have to use the preprocessor macros __AVX__ and __AVX2__ to determine when they are available.
This commit is contained in:
Jeffrey Walton 2018-06-18 18:33:17 -04:00
parent 207c6fcb1a
commit 0ccdc197b7
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
3 changed files with 86 additions and 8 deletions

49
cpu.cpp
View File

@ -21,6 +21,14 @@
# include <unistd.h>
#endif
//#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
//# if defined(_MSC_VER)
//# include <intrin.h>
//# else
//# include <immintrin.h>
//# endif
//#endif
// Capability queries, requires Glibc 2.16, http://lwn.net/Articles/519085/
// CRYPTOPP_GLIBC_VERSION not used because config.h is missing <feature.h>
#if (((__GLIBC__ * 100) + __GLIBC_MINOR__) >= 216)
@ -187,6 +195,8 @@ bool CRYPTOPP_SECTION_INIT g_hasSSE2 = false;
bool CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false;
bool CRYPTOPP_SECTION_INIT g_hasSSE42 = false;
bool CRYPTOPP_SECTION_INIT g_hasAVX = false;
bool CRYPTOPP_SECTION_INIT g_hasAVX2 = false;
bool CRYPTOPP_SECTION_INIT g_hasAESNI = false;
bool CRYPTOPP_SECTION_INIT g_hasCLMUL = false;
bool CRYPTOPP_SECTION_INIT g_hasADX = false;
@ -245,12 +255,38 @@ void DetectX86Features()
g_hasAESNI = g_hasSSE2 && ((cpuid1[2] & (1<<25)) != 0);
g_hasCLMUL = g_hasSSE2 && ((cpuid1[2] & (1<< 1)) != 0);
// AVX is similar to SSE, but check both bits 27 (SSE) and 28 (AVX).
// https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
CRYPTOPP_CONSTANT(YMM_FLAG = (3 << 1))
CRYPTOPP_CONSTANT(AVX_FLAG = (3 << 27))
if ((cpuid1[2] & AVX_FLAG) == AVX_FLAG)
{
#if defined(__GNUC__)
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 and
// http://www.agner.org/optimize/vectorclass/read.php?i=65
word32 a=0, d=0;
__asm __volatile
(
// GCC 4.1/Binutils 2.17 cannot consume xgetbv
// "xgetbv" : "=a"(a), "=d"(d) : "c"(0) :
".byte 0x0f, 0x01, 0xd0" "\n\t"
: "=a"(a), "=d"(d) : "c"(0) :
);
word64 xcr0 = a | static_cast<word64>(d) << 32;
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
#else
word64 xcr0 = _xgetbv(0);
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
#endif
}
if (IsIntel(cpuid0))
{
CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30))
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
CRYPTOPP_CONSTANT( AVX2_FLAG = (1 << 5))
g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
@ -263,6 +299,7 @@ void DetectX86Features()
g_hasRDSEED = (cpuid2[1] /*EBX*/ & RDSEED_FLAG) != 0;
g_hasADX = (cpuid2[1] /*EBX*/ & ADX_FLAG) != 0;
g_hasSHA = (cpuid2[1] /*EBX*/ & SHA_FLAG) != 0;
g_hasAVX2 = (cpuid2[1] /*EBX*/ & AVX2_FLAG) != 0;
}
}
}
@ -272,6 +309,7 @@ void DetectX86Features()
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
CRYPTOPP_CONSTANT( AVX2_FLAG = (1 << 5))
CpuId(0x80000005, 0, cpuid2);
g_cacheLineSize = GETBYTE(cpuid2[2], 0);
@ -284,6 +322,7 @@ void DetectX86Features()
g_hasRDSEED = (cpuid2[1] /*EBX*/ & RDSEED_FLAG) != 0;
g_hasADX = (cpuid2[1] /*EBX*/ & ADX_FLAG) != 0;
g_hasSHA = (cpuid2[1] /*EBX*/ & SHA_FLAG) != 0;
g_hasAVX2 = (cpuid2[1] /*EBX*/ & AVX2_FLAG) != 0;
}
}
}
@ -300,11 +339,11 @@ void DetectX86Features()
{
// Extended features available
CpuId(0xC0000001, 0, cpuid2);
g_hasPadlockRNG = (cpuid2[3] /*EDX*/ & RNG_FLAGS) != 0;
g_hasPadlockACE = (cpuid2[3] /*EDX*/ & ACE_FLAGS) != 0;
g_hasPadlockACE2 = (cpuid2[3] /*EDX*/ & ACE2_FLAGS) != 0;
g_hasPadlockPHE = (cpuid2[3] /*EDX*/ & PHE_FLAGS) != 0;
g_hasPadlockPMM = (cpuid2[3] /*EDX*/ & PMM_FLAGS) != 0;
g_hasPadlockRNG = (cpuid2[3] /*EDX*/ & RNG_FLAGS) == RNG_FLAGS;
g_hasPadlockACE = (cpuid2[3] /*EDX*/ & ACE_FLAGS) == ACE_FLAGS;
g_hasPadlockACE2 = (cpuid2[3] /*EDX*/ & ACE2_FLAGS) == ACE2_FLAGS;
g_hasPadlockPHE = (cpuid2[3] /*EDX*/ & PHE_FLAGS) == PHE_FLAGS;
g_hasPadlockPMM = (cpuid2[3] /*EDX*/ & PMM_FLAGS) == PMM_FLAGS;
}
}

30
cpu.h
View File

@ -85,6 +85,8 @@ extern CRYPTOPP_DLL bool g_hasSSE2;
extern CRYPTOPP_DLL bool g_hasSSSE3;
extern CRYPTOPP_DLL bool g_hasSSE41;
extern CRYPTOPP_DLL bool g_hasSSE42;
extern CRYPTOPP_DLL bool g_hasAVX;
extern CRYPTOPP_DLL bool g_hasAVX2;
extern CRYPTOPP_DLL bool g_hasAESNI;
extern CRYPTOPP_DLL bool g_hasCLMUL;
extern CRYPTOPP_DLL bool g_hasSHA;
@ -158,6 +160,7 @@ inline bool HasSSE42()
/// \brief Determines AES-NI availability
/// \returns true if AES-NI is determined to be available, false otherwise
/// \details HasAESNI() is a runtime check performed using CPUID
/// \since Crypto++ 5.6.1
/// \note This function is only available on Intel IA-32 platforms
inline bool HasAESNI()
{
@ -169,6 +172,7 @@ inline bool HasAESNI()
/// \brief Determines Carryless Multiply availability
/// \returns true if pclmulqdq is determined to be available, false otherwise
/// \details HasCLMUL() is a runtime check performed using CPUID
/// \since Crypto++ 5.6.1
/// \note This function is only available on Intel IA-32 platforms
inline bool HasCLMUL()
{
@ -180,6 +184,7 @@ inline bool HasCLMUL()
/// \brief Determines SHA availability
/// \returns true if SHA is determined to be available, false otherwise
/// \details HasSHA() is a runtime check performed using CPUID
/// \since Crypto++ 6.0
/// \note This function is only available on Intel IA-32 platforms
inline bool HasSHA()
{
@ -191,6 +196,7 @@ inline bool HasSHA()
/// \brief Determines ADX availability
/// \returns true if ADX is determined to be available, false otherwise
/// \details HasADX() is a runtime check performed using CPUID
/// \since Crypto++ 7.0
/// \note This function is only available on Intel IA-32 platforms
inline bool HasADX()
{
@ -199,6 +205,30 @@ inline bool HasADX()
return g_hasADX;
}
/// \brief Determines AVX availability
/// \returns true if AVX is determined to be available, false otherwise
/// \details HasAVX() is a runtime check performed using CPUID
/// \since Crypto++ 7.1
/// \note This function is only available on Intel IA-32 platforms
inline bool HasAVX()
{
if (!g_x86DetectionDone)
DetectX86Features();
return g_hasAVX;
}
/// \brief Determines AVX2 availability
/// \returns true if AVX2 is determined to be available, false otherwise
/// \details HasAVX2() is a runtime check performed using CPUID
/// \since Crypto++ 7.1
/// \note This function is only available on Intel IA-32 platforms
inline bool HasAVX2()
{
if (!g_x86DetectionDone)
DetectX86Features();
return g_hasAVX2;
}
/// \brief Determines if the CPU is an Intel P4
/// \returns true if the CPU is a P4, false otherwise
/// \details IsP4() is a runtime check performed using CPUID

View File

@ -369,11 +369,20 @@ bool TestSettings()
bool hasSSSE3 = HasSSSE3();
bool hasSSE41 = HasSSE41();
bool hasSSE42 = HasSSE42();
bool hasAVX = HasAVX();
bool hasAVX2 = HasAVX2();
bool hasAESNI = HasAESNI();
bool hasCLMUL = HasCLMUL();
bool hasRDRAND = HasRDRAND();
bool hasRDSEED = HasRDSEED();
bool hasSHA = HasSHA();
bool isP4 = IsP4();
std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41 << ", hasSSE4.2 == " << hasSSE42;
std::cout << ", hasAESNI == " << HasAESNI() << ", hasCLMUL == " << HasCLMUL() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED();
std::cout << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4 << "\n";
std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41;
std::cout << ", hasSSE4.2 == " << hasSSE42 << ", hasAVX == " << hasAVX << ", hasAVX2 == " << hasAVX2;
std::cout << ", hasAESNI == " << hasAESNI << ", hasCLMUL == " << hasCLMUL << ", hasRDRAND == " << HasRDRAND;
std::cout << ", hasRDSEED == " << HasRDSEED << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4;
std::cout << "\n";
#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
bool hasNEON = HasNEON();