mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-23 09:59:42 +00:00
Add AVX and AVX2 runtime feature detection (GH #671)
There are no corresponding defines in config.h at the moment. Programs will have to use the preprocessor macros __AVX__ and __AVX2__ to determine when they are available.
This commit is contained in:
parent
207c6fcb1a
commit
0ccdc197b7
49
cpu.cpp
49
cpu.cpp
@ -21,6 +21,14 @@
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
//#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
|
||||
//# if defined(_MSC_VER)
|
||||
//# include <intrin.h>
|
||||
//# else
|
||||
//# include <immintrin.h>
|
||||
//# endif
|
||||
//#endif
|
||||
|
||||
// Capability queries, requires Glibc 2.16, http://lwn.net/Articles/519085/
|
||||
// CRYPTOPP_GLIBC_VERSION not used because config.h is missing <feature.h>
|
||||
#if (((__GLIBC__ * 100) + __GLIBC_MINOR__) >= 216)
|
||||
@ -187,6 +195,8 @@ bool CRYPTOPP_SECTION_INIT g_hasSSE2 = false;
|
||||
bool CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
|
||||
bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false;
|
||||
bool CRYPTOPP_SECTION_INIT g_hasSSE42 = false;
|
||||
bool CRYPTOPP_SECTION_INIT g_hasAVX = false;
|
||||
bool CRYPTOPP_SECTION_INIT g_hasAVX2 = false;
|
||||
bool CRYPTOPP_SECTION_INIT g_hasAESNI = false;
|
||||
bool CRYPTOPP_SECTION_INIT g_hasCLMUL = false;
|
||||
bool CRYPTOPP_SECTION_INIT g_hasADX = false;
|
||||
@ -245,12 +255,38 @@ void DetectX86Features()
|
||||
g_hasAESNI = g_hasSSE2 && ((cpuid1[2] & (1<<25)) != 0);
|
||||
g_hasCLMUL = g_hasSSE2 && ((cpuid1[2] & (1<< 1)) != 0);
|
||||
|
||||
// AVX is similar to SSE, but check both bits 27 (SSE) and 28 (AVX).
|
||||
// https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
|
||||
CRYPTOPP_CONSTANT(YMM_FLAG = (3 << 1))
|
||||
CRYPTOPP_CONSTANT(AVX_FLAG = (3 << 27))
|
||||
if ((cpuid1[2] & AVX_FLAG) == AVX_FLAG)
|
||||
{
|
||||
#if defined(__GNUC__)
|
||||
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 and
|
||||
// http://www.agner.org/optimize/vectorclass/read.php?i=65
|
||||
word32 a=0, d=0;
|
||||
__asm __volatile
|
||||
(
|
||||
// GCC 4.1/Binutils 2.17 cannot consume xgetbv
|
||||
// "xgetbv" : "=a"(a), "=d"(d) : "c"(0) :
|
||||
".byte 0x0f, 0x01, 0xd0" "\n\t"
|
||||
: "=a"(a), "=d"(d) : "c"(0) :
|
||||
);
|
||||
word64 xcr0 = a | static_cast<word64>(d) << 32;
|
||||
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
|
||||
#else
|
||||
word64 xcr0 = _xgetbv(0);
|
||||
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IsIntel(cpuid0))
|
||||
{
|
||||
CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30))
|
||||
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
|
||||
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
|
||||
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
|
||||
CRYPTOPP_CONSTANT( AVX2_FLAG = (1 << 5))
|
||||
|
||||
g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
|
||||
g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
|
||||
@ -263,6 +299,7 @@ void DetectX86Features()
|
||||
g_hasRDSEED = (cpuid2[1] /*EBX*/ & RDSEED_FLAG) != 0;
|
||||
g_hasADX = (cpuid2[1] /*EBX*/ & ADX_FLAG) != 0;
|
||||
g_hasSHA = (cpuid2[1] /*EBX*/ & SHA_FLAG) != 0;
|
||||
g_hasAVX2 = (cpuid2[1] /*EBX*/ & AVX2_FLAG) != 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -272,6 +309,7 @@ void DetectX86Features()
|
||||
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
|
||||
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
|
||||
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
|
||||
CRYPTOPP_CONSTANT( AVX2_FLAG = (1 << 5))
|
||||
|
||||
CpuId(0x80000005, 0, cpuid2);
|
||||
g_cacheLineSize = GETBYTE(cpuid2[2], 0);
|
||||
@ -284,6 +322,7 @@ void DetectX86Features()
|
||||
g_hasRDSEED = (cpuid2[1] /*EBX*/ & RDSEED_FLAG) != 0;
|
||||
g_hasADX = (cpuid2[1] /*EBX*/ & ADX_FLAG) != 0;
|
||||
g_hasSHA = (cpuid2[1] /*EBX*/ & SHA_FLAG) != 0;
|
||||
g_hasAVX2 = (cpuid2[1] /*EBX*/ & AVX2_FLAG) != 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -300,11 +339,11 @@ void DetectX86Features()
|
||||
{
|
||||
// Extended features available
|
||||
CpuId(0xC0000001, 0, cpuid2);
|
||||
g_hasPadlockRNG = (cpuid2[3] /*EDX*/ & RNG_FLAGS) != 0;
|
||||
g_hasPadlockACE = (cpuid2[3] /*EDX*/ & ACE_FLAGS) != 0;
|
||||
g_hasPadlockACE2 = (cpuid2[3] /*EDX*/ & ACE2_FLAGS) != 0;
|
||||
g_hasPadlockPHE = (cpuid2[3] /*EDX*/ & PHE_FLAGS) != 0;
|
||||
g_hasPadlockPMM = (cpuid2[3] /*EDX*/ & PMM_FLAGS) != 0;
|
||||
g_hasPadlockRNG = (cpuid2[3] /*EDX*/ & RNG_FLAGS) == RNG_FLAGS;
|
||||
g_hasPadlockACE = (cpuid2[3] /*EDX*/ & ACE_FLAGS) == ACE_FLAGS;
|
||||
g_hasPadlockACE2 = (cpuid2[3] /*EDX*/ & ACE2_FLAGS) == ACE2_FLAGS;
|
||||
g_hasPadlockPHE = (cpuid2[3] /*EDX*/ & PHE_FLAGS) == PHE_FLAGS;
|
||||
g_hasPadlockPMM = (cpuid2[3] /*EDX*/ & PMM_FLAGS) == PMM_FLAGS;
|
||||
}
|
||||
}
|
||||
|
||||
|
30
cpu.h
30
cpu.h
@ -85,6 +85,8 @@ extern CRYPTOPP_DLL bool g_hasSSE2;
|
||||
extern CRYPTOPP_DLL bool g_hasSSSE3;
|
||||
extern CRYPTOPP_DLL bool g_hasSSE41;
|
||||
extern CRYPTOPP_DLL bool g_hasSSE42;
|
||||
extern CRYPTOPP_DLL bool g_hasAVX;
|
||||
extern CRYPTOPP_DLL bool g_hasAVX2;
|
||||
extern CRYPTOPP_DLL bool g_hasAESNI;
|
||||
extern CRYPTOPP_DLL bool g_hasCLMUL;
|
||||
extern CRYPTOPP_DLL bool g_hasSHA;
|
||||
@ -158,6 +160,7 @@ inline bool HasSSE42()
|
||||
/// \brief Determines AES-NI availability
|
||||
/// \returns true if AES-NI is determined to be available, false otherwise
|
||||
/// \details HasAESNI() is a runtime check performed using CPUID
|
||||
/// \since Crypto++ 5.6.1
|
||||
/// \note This function is only available on Intel IA-32 platforms
|
||||
inline bool HasAESNI()
|
||||
{
|
||||
@ -169,6 +172,7 @@ inline bool HasAESNI()
|
||||
/// \brief Determines Carryless Multiply availability
|
||||
/// \returns true if pclmulqdq is determined to be available, false otherwise
|
||||
/// \details HasCLMUL() is a runtime check performed using CPUID
|
||||
/// \since Crypto++ 5.6.1
|
||||
/// \note This function is only available on Intel IA-32 platforms
|
||||
inline bool HasCLMUL()
|
||||
{
|
||||
@ -180,6 +184,7 @@ inline bool HasCLMUL()
|
||||
/// \brief Determines SHA availability
|
||||
/// \returns true if SHA is determined to be available, false otherwise
|
||||
/// \details HasSHA() is a runtime check performed using CPUID
|
||||
/// \since Crypto++ 6.0
|
||||
/// \note This function is only available on Intel IA-32 platforms
|
||||
inline bool HasSHA()
|
||||
{
|
||||
@ -191,6 +196,7 @@ inline bool HasSHA()
|
||||
/// \brief Determines ADX availability
|
||||
/// \returns true if ADX is determined to be available, false otherwise
|
||||
/// \details HasADX() is a runtime check performed using CPUID
|
||||
/// \since Crypto++ 7.0
|
||||
/// \note This function is only available on Intel IA-32 platforms
|
||||
inline bool HasADX()
|
||||
{
|
||||
@ -199,6 +205,30 @@ inline bool HasADX()
|
||||
return g_hasADX;
|
||||
}
|
||||
|
||||
/// \brief Determines AVX availability
|
||||
/// \returns true if AVX is determined to be available, false otherwise
|
||||
/// \details HasAVX() is a runtime check performed using CPUID
|
||||
/// \since Crypto++ 7.1
|
||||
/// \note This function is only available on Intel IA-32 platforms
|
||||
inline bool HasAVX()
|
||||
{
|
||||
if (!g_x86DetectionDone)
|
||||
DetectX86Features();
|
||||
return g_hasAVX;
|
||||
}
|
||||
|
||||
/// \brief Determines AVX2 availability
|
||||
/// \returns true if AVX2 is determined to be available, false otherwise
|
||||
/// \details HasAVX2() is a runtime check performed using CPUID
|
||||
/// \since Crypto++ 7.1
|
||||
/// \note This function is only available on Intel IA-32 platforms
|
||||
inline bool HasAVX2()
|
||||
{
|
||||
if (!g_x86DetectionDone)
|
||||
DetectX86Features();
|
||||
return g_hasAVX2;
|
||||
}
|
||||
|
||||
/// \brief Determines if the CPU is an Intel P4
|
||||
/// \returns true if the CPU is a P4, false otherwise
|
||||
/// \details IsP4() is a runtime check performed using CPUID
|
||||
|
15
validat1.cpp
15
validat1.cpp
@ -369,11 +369,20 @@ bool TestSettings()
|
||||
bool hasSSSE3 = HasSSSE3();
|
||||
bool hasSSE41 = HasSSE41();
|
||||
bool hasSSE42 = HasSSE42();
|
||||
bool hasAVX = HasAVX();
|
||||
bool hasAVX2 = HasAVX2();
|
||||
bool hasAESNI = HasAESNI();
|
||||
bool hasCLMUL = HasCLMUL();
|
||||
bool hasRDRAND = HasRDRAND();
|
||||
bool hasRDSEED = HasRDSEED();
|
||||
bool hasSHA = HasSHA();
|
||||
bool isP4 = IsP4();
|
||||
|
||||
std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41 << ", hasSSE4.2 == " << hasSSE42;
|
||||
std::cout << ", hasAESNI == " << HasAESNI() << ", hasCLMUL == " << HasCLMUL() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED();
|
||||
std::cout << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4 << "\n";
|
||||
std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41;
|
||||
std::cout << ", hasSSE4.2 == " << hasSSE42 << ", hasAVX == " << hasAVX << ", hasAVX2 == " << hasAVX2;
|
||||
std::cout << ", hasAESNI == " << hasAESNI << ", hasCLMUL == " << hasCLMUL << ", hasRDRAND == " << HasRDRAND;
|
||||
std::cout << ", hasRDSEED == " << HasRDSEED << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4;
|
||||
std::cout << "\n";
|
||||
|
||||
#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
|
||||
bool hasNEON = HasNEON();
|
||||
|
Loading…
Reference in New Issue
Block a user