mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-27 03:40:22 +00:00
bf9809ef57
* Fixed by Jeffrey Walton (noloader)
764 lines
21 KiB
C++
764 lines
21 KiB
C++
// cpu.cpp - written and placed in the public domain by Wei Dai
|
|
|
|
#include "pch.h"
|
|
#include "config.h"
|
|
|
|
#ifndef EXCEPTION_EXECUTE_HANDLER
|
|
# define EXCEPTION_EXECUTE_HANDLER 1
|
|
#endif
|
|
|
|
#ifndef CRYPTOPP_IMPORTS
|
|
|
|
#include "cpu.h"
|
|
#include "misc.h"
|
|
#include <algorithm>
|
|
|
|
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
|
#include <signal.h>
|
|
#include <setjmp.h>
|
|
#endif
|
|
|
|
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
NAMESPACE_BEGIN(CryptoPP)
|
|
|
|
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
|
|
|
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
|
|
#define HAVE_GCC_CONSTRUCTOR1 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && ((CRYPTOPP_GCC_VERSION >= 40300) || (CRYPTOPP_LLVM_CLANG_VERSION >= 20900) || (_INTEL_COMPILER >= 300)) && !(MACPORTS_GCC_COMPILER > 0))
|
|
#define HAVE_GCC_CONSTRUCTOR0 (__GNUC__ && (CRYPTOPP_INIT_PRIORITY > 0) && !(MACPORTS_GCC_COMPILER > 0))
|
|
|
|
extern "C" {
|
|
typedef void (*SigHandler)(int);
|
|
};
|
|
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
|
|
|
#ifdef CRYPTOPP_CPUID_AVAILABLE
|
|
|
|
#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
|
|
|
|
bool CpuId(word32 input, word32 output[4])
|
|
{
|
|
__cpuid((int *)output, input);
|
|
return true;
|
|
}
|
|
|
|
#else
|
|
|
|
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
|
extern "C"
|
|
{
|
|
static jmp_buf s_jmpNoCPUID;
|
|
static void SigIllHandlerCPUID(int)
|
|
{
|
|
longjmp(s_jmpNoCPUID, 1);
|
|
}
|
|
|
|
static jmp_buf s_jmpNoSSE2;
|
|
static void SigIllHandlerSSE2(int)
|
|
{
|
|
longjmp(s_jmpNoSSE2, 1);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
bool CpuId(word32 input, word32 output[4])
|
|
{
|
|
#if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
|
__try
|
|
{
|
|
__asm
|
|
{
|
|
mov eax, input
|
|
mov ecx, 0
|
|
cpuid
|
|
mov edi, output
|
|
mov [edi], eax
|
|
mov [edi+4], ebx
|
|
mov [edi+8], ecx
|
|
mov [edi+12], edx
|
|
}
|
|
}
|
|
// GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
|
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// function 0 returns the highest basic function understood in EAX
|
|
if(input == 0)
|
|
return !!output[0];
|
|
|
|
return true;
|
|
#else
|
|
// longjmp and clobber warnings. Volatile is required.
|
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
volatile bool result = true;
|
|
|
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
|
|
if (oldHandler == SIG_ERR)
|
|
return false;
|
|
|
|
# ifndef __MINGW32__
|
|
volatile sigset_t oldMask;
|
|
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
|
return false;
|
|
# endif
|
|
|
|
if (setjmp(s_jmpNoCPUID))
|
|
result = false;
|
|
else
|
|
{
|
|
asm volatile
|
|
(
|
|
// save ebx in case -fPIC is being used
|
|
// TODO: this might need an early clobber on EDI.
|
|
# if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
|
|
"pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
|
|
# else
|
|
"push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
|
|
# endif
|
|
: "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
|
|
: "a" (input), "c" (0)
|
|
);
|
|
}
|
|
|
|
# ifndef __MINGW32__
|
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
|
# endif
|
|
|
|
signal(SIGILL, oldHandler);
|
|
return result;
|
|
#endif
|
|
}
|
|
|
|
#endif
|
|
|
|
static bool TrySSE2()
|
|
{
|
|
#if CRYPTOPP_BOOL_X64
|
|
return true;
|
|
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
|
__try
|
|
{
|
|
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
|
AS2(por xmm0, xmm0) // executing SSE2 instruction
|
|
#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
|
|
__m128i x = _mm_setzero_si128();
|
|
return _mm_cvtsi128_si32(x) == 0;
|
|
#endif
|
|
}
|
|
// GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
|
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
{
|
|
return false;
|
|
}
|
|
return true;
|
|
#else
|
|
// longjmp and clobber warnings. Volatile is required.
|
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
volatile bool result = true;
|
|
|
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
|
|
if (oldHandler == SIG_ERR)
|
|
return false;
|
|
|
|
# ifndef __MINGW32__
|
|
volatile sigset_t oldMask;
|
|
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
|
return false;
|
|
# endif
|
|
|
|
if (setjmp(s_jmpNoSSE2))
|
|
result = false;
|
|
else
|
|
{
|
|
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
|
|
__asm __volatile ("por %xmm0, %xmm0");
|
|
#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
|
|
__m128i x = _mm_setzero_si128();
|
|
result = _mm_cvtsi128_si32(x) == 0;
|
|
#endif
|
|
}
|
|
|
|
# ifndef __MINGW32__
|
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
|
# endif
|
|
|
|
signal(SIGILL, oldHandler);
|
|
return result;
|
|
#endif
|
|
}
|
|
|
|
bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false;
|
|
bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
|
|
bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_isP4 = false;
|
|
bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false;
|
|
bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false;
|
|
bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false;
|
|
word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
|
|
|
static inline bool IsIntel(const word32 output[4])
|
|
{
|
|
// This is the "GenuineIntel" string
|
|
return (output[1] /*EBX*/ == 0x756e6547) &&
|
|
(output[2] /*ECX*/ == 0x6c65746e) &&
|
|
(output[3] /*EDX*/ == 0x49656e69);
|
|
}
|
|
|
|
static inline bool IsAMD(const word32 output[4])
|
|
{
|
|
// This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!"
|
|
return (output[1] /*EBX*/ == 0x68747541) &&
|
|
(output[2] /*ECX*/ == 0x444D4163) &&
|
|
(output[3] /*EDX*/ == 0x69746E65);
|
|
}
|
|
|
|
static inline bool IsVIA(const word32 output[4])
|
|
{
|
|
// This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA "
|
|
return (output[1] /*EBX*/ == 0x746e6543) &&
|
|
(output[2] /*ECX*/ == 0x736c7561) &&
|
|
(output[3] /*EDX*/ == 0x48727561);
|
|
}
|
|
|
|
#if HAVE_GCC_CONSTRUCTOR1
|
|
void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectX86Features()
|
|
#elif HAVE_GCC_CONSTRUCTOR0
|
|
void __attribute__ ((constructor)) DetectX86Features()
|
|
#else
|
|
void DetectX86Features()
|
|
#endif
|
|
{
|
|
word32 cpuid[4], cpuid1[4];
|
|
if (!CpuId(0, cpuid))
|
|
return;
|
|
if (!CpuId(1, cpuid1))
|
|
return;
|
|
|
|
g_hasMMX = (cpuid1[3] & (1 << 23)) != 0;
|
|
if ((cpuid1[3] & (1 << 26)) != 0)
|
|
g_hasSSE2 = TrySSE2();
|
|
g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
|
|
g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) && (cpuid1[2] & (1<<20)));
|
|
g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
|
|
g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));
|
|
|
|
if ((cpuid1[3] & (1 << 25)) != 0)
|
|
g_hasISSE = true;
|
|
else
|
|
{
|
|
word32 cpuid2[4];
|
|
CpuId(0x080000000, cpuid2);
|
|
if (cpuid2[0] >= 0x080000001)
|
|
{
|
|
CpuId(0x080000001, cpuid2);
|
|
g_hasISSE = (cpuid2[3] & (1 << 22)) != 0;
|
|
}
|
|
}
|
|
|
|
if (IsIntel(cpuid))
|
|
{
|
|
static const unsigned int RDRAND_FLAG = (1 << 30);
|
|
static const unsigned int RDSEED_FLAG = (1 << 18);
|
|
|
|
g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
|
|
g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
|
|
g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG);
|
|
|
|
if (cpuid[0] /*EAX*/ >= 7)
|
|
{
|
|
word32 cpuid3[4];
|
|
if (CpuId(7, cpuid3))
|
|
g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG);
|
|
}
|
|
}
|
|
else if (IsAMD(cpuid))
|
|
{
|
|
static const unsigned int RDRAND_FLAG = (1 << 30);
|
|
|
|
CpuId(0x01, cpuid);
|
|
g_hasRDRAND = !!(cpuid[2] /*ECX*/ & RDRAND_FLAG);
|
|
|
|
CpuId(0x80000005, cpuid);
|
|
g_cacheLineSize = GETBYTE(cpuid[2], 0);
|
|
}
|
|
else if (IsVIA(cpuid))
|
|
{
|
|
static const unsigned int RNG_FLAGS = (0x3 << 2);
|
|
static const unsigned int ACE_FLAGS = (0x3 << 6);
|
|
static const unsigned int ACE2_FLAGS = (0x3 << 8);
|
|
static const unsigned int PHE_FLAGS = (0x3 << 10);
|
|
static const unsigned int PMM_FLAGS = (0x3 << 12);
|
|
|
|
CpuId(0xC0000000, cpuid);
|
|
if (cpuid[0] >= 0xC0000001)
|
|
{
|
|
// Extended features available
|
|
CpuId(0xC0000001, cpuid);
|
|
g_hasPadlockRNG = !!(cpuid[3] /*EDX*/ & RNG_FLAGS);
|
|
g_hasPadlockACE = !!(cpuid[3] /*EDX*/ & ACE_FLAGS);
|
|
g_hasPadlockACE2 = !!(cpuid[3] /*EDX*/ & ACE2_FLAGS);
|
|
g_hasPadlockPHE = !!(cpuid[3] /*EDX*/ & PHE_FLAGS);
|
|
g_hasPadlockPMM = !!(cpuid[3] /*EDX*/ & PMM_FLAGS);
|
|
}
|
|
}
|
|
|
|
if (!g_cacheLineSize)
|
|
g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
|
|
|
*((volatile bool*)&g_x86DetectionDone) = true;
|
|
}
|
|
|
|
#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
|
|
|
|
// The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0.
|
|
// Attempting to run the code results in a SIGILL and termination.
|
|
//
|
|
// #if defined(__arm64__) || defined(__aarch64__)
|
|
// word64 caps = 0; // Read ID_AA64ISAR0_EL1
|
|
// __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps));
|
|
// #elif defined(__arm__) || defined(__aarch32__)
|
|
// word32 caps = 0; // Read ID_ISAR5_EL1
|
|
// __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps));
|
|
// #endif
|
|
//
|
|
// The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64
|
|
// http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
|
|
//
|
|
bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false;
|
|
bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false;
|
|
bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false;
|
|
word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
|
|
|
|
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
|
extern "C"
|
|
{
|
|
static jmp_buf s_jmpNoNEON;
|
|
static void SigIllHandlerNEON(int)
|
|
{
|
|
longjmp(s_jmpNoNEON, 1);
|
|
}
|
|
|
|
static jmp_buf s_jmpNoPMULL;
|
|
static void SigIllHandlerPMULL(int)
|
|
{
|
|
longjmp(s_jmpNoPMULL, 1);
|
|
}
|
|
|
|
static jmp_buf s_jmpNoCRC32;
|
|
static void SigIllHandlerCRC32(int)
|
|
{
|
|
longjmp(s_jmpNoCRC32, 1);
|
|
}
|
|
|
|
static jmp_buf s_jmpNoAES;
|
|
static void SigIllHandlerAES(int)
|
|
{
|
|
longjmp(s_jmpNoAES, 1);
|
|
}
|
|
|
|
static jmp_buf s_jmpNoSHA1;
|
|
static void SigIllHandlerSHA1(int)
|
|
{
|
|
longjmp(s_jmpNoSHA1, 1);
|
|
}
|
|
|
|
static jmp_buf s_jmpNoSHA2;
|
|
static void SigIllHandlerSHA2(int)
|
|
{
|
|
longjmp(s_jmpNoSHA2, 1);
|
|
}
|
|
};
|
|
#endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
|
|
|
|
static bool TryNEON()
|
|
{
|
|
#if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE)
|
|
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
|
volatile bool result = true;
|
|
__try
|
|
{
|
|
uint32_t v1[4] = {1,1,1,1};
|
|
uint32x4_t x1 = vld1q_u32(v1);
|
|
uint64_t v2[2] = {1,1};
|
|
uint64x2_t x2 = vld1q_u64(v2);
|
|
|
|
uint32x4_t x3 = vdupq_n_u32(2);
|
|
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
|
|
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
|
|
uint64x2_t x4 = vdupq_n_u64(2);
|
|
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
|
|
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
|
|
|
|
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
|
|
}
|
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
{
|
|
return false;
|
|
}
|
|
return result;
|
|
# else
|
|
// longjmp and clobber warnings. Volatile is required.
|
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
volatile bool result = true;
|
|
|
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
|
|
if (oldHandler == SIG_ERR)
|
|
return false;
|
|
|
|
volatile sigset_t oldMask;
|
|
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
|
return false;
|
|
|
|
if (setjmp(s_jmpNoNEON))
|
|
result = false;
|
|
else
|
|
{
|
|
uint32_t v1[4] = {1,1,1,1};
|
|
uint32x4_t x1 = vld1q_u32(v1);
|
|
uint64_t v2[2] = {1,1};
|
|
uint64x2_t x2 = vld1q_u64(v2);
|
|
|
|
uint32x4_t x3 = {0,0,0,0};
|
|
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
|
|
x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
|
|
uint64x2_t x4 = {0,0};
|
|
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
|
|
x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
|
|
|
|
// Hack... GCC optimizes away the code and returns true
|
|
result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
|
|
}
|
|
|
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
|
signal(SIGILL, oldHandler);
|
|
return result;
|
|
# endif
|
|
#else
|
|
return false;
|
|
#endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
|
|
}
|
|
|
|
static bool TryPMULL()
|
|
{
|
|
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
|
|
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
|
volatile bool result = true;
|
|
__try
|
|
{
|
|
const poly64_t a1={2}, b1={3};
|
|
const poly64x2_t a2={4,5}, b2={6,7};
|
|
const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
|
|
|
|
const poly128_t r1 = vmull_p64(a1, b1);
|
|
const poly128_t r2 = vmull_high_p64(a2, b2);
|
|
const poly128_t r3 = vmull_high_p64(a3, b3);
|
|
|
|
// Also see https://github.com/weidai11/cryptopp/issues/233.
|
|
const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0}
|
|
const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0}
|
|
const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum}
|
|
|
|
result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
|
|
vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
|
|
}
|
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
{
|
|
return false;
|
|
}
|
|
return result;
|
|
# else
|
|
// longjmp and clobber warnings. Volatile is required.
|
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
volatile bool result = true;
|
|
|
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL);
|
|
if (oldHandler == SIG_ERR)
|
|
return false;
|
|
|
|
volatile sigset_t oldMask;
|
|
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
|
return false;
|
|
|
|
if (setjmp(s_jmpNoPMULL))
|
|
result = false;
|
|
else
|
|
{
|
|
const poly64_t a1={2}, b1={3};
|
|
const poly64x2_t a2={4,5}, b2={6,7};
|
|
const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
|
|
|
|
const poly128_t r1 = vmull_p64(a1, b1);
|
|
const poly128_t r2 = vmull_high_p64(a2, b2);
|
|
const poly128_t r3 = vmull_high_p64(a3, b3);
|
|
|
|
// Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233.
|
|
const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0}
|
|
const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0}
|
|
const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum}
|
|
|
|
result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
|
|
vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
|
|
}
|
|
|
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
|
signal(SIGILL, oldHandler);
|
|
return result;
|
|
# endif
|
|
#else
|
|
return false;
|
|
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
|
}
|
|
|
|
static bool TryCRC32()
|
|
{
|
|
#if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE)
|
|
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
|
volatile bool result = true;
|
|
__try
|
|
{
|
|
word32 w=0, x=1; word16 y=2; byte z=3;
|
|
w = __crc32cw(w,x);
|
|
w = __crc32ch(w,y);
|
|
w = __crc32cb(w,z);
|
|
|
|
result = !!w;
|
|
}
|
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
{
|
|
return false;
|
|
}
|
|
return result;
|
|
# else
|
|
// longjmp and clobber warnings. Volatile is required.
|
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
volatile bool result = true;
|
|
|
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
|
|
if (oldHandler == SIG_ERR)
|
|
return false;
|
|
|
|
volatile sigset_t oldMask;
|
|
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
|
return false;
|
|
|
|
if (setjmp(s_jmpNoCRC32))
|
|
result = false;
|
|
else
|
|
{
|
|
word32 w=0, x=1; word16 y=2; byte z=3;
|
|
w = __crc32cw(w,x);
|
|
w = __crc32ch(w,y);
|
|
w = __crc32cb(w,z);
|
|
|
|
// Hack... GCC optimizes away the code and returns true
|
|
result = !!w;
|
|
}
|
|
|
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
|
signal(SIGILL, oldHandler);
|
|
return result;
|
|
# endif
|
|
#else
|
|
return false;
|
|
#endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE
|
|
}
|
|
|
|
static bool TryAES()
|
|
{
|
|
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
|
|
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
|
volatile bool result = true;
|
|
__try
|
|
{
|
|
// AES encrypt and decrypt
|
|
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
|
|
uint8x16_t r1 = vaeseq_u8(data, key);
|
|
uint8x16_t r2 = vaesdq_u8(data, key);
|
|
|
|
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
|
|
}
|
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
{
|
|
return false;
|
|
}
|
|
return result;
|
|
# else
|
|
// longjmp and clobber warnings. Volatile is required.
|
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
volatile bool result = true;
|
|
|
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
|
|
if (oldHandler == SIG_ERR)
|
|
return false;
|
|
|
|
volatile sigset_t oldMask;
|
|
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
|
return false;
|
|
|
|
if (setjmp(s_jmpNoAES))
|
|
result = false;
|
|
else
|
|
{
|
|
uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
|
|
uint8x16_t r1 = vaeseq_u8(data, key);
|
|
uint8x16_t r2 = vaesdq_u8(data, key);
|
|
|
|
// Hack... GCC optimizes away the code and returns true
|
|
result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7));
|
|
}
|
|
|
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
|
signal(SIGILL, oldHandler);
|
|
return result;
|
|
# endif
|
|
#else
|
|
return false;
|
|
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
|
}
|
|
|
|
static bool TrySHA1()
|
|
{
|
|
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
|
|
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
|
volatile bool result = true;
|
|
__try
|
|
{
|
|
uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
|
|
|
|
uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
|
|
uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
|
|
uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
|
|
uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
|
|
uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
|
|
|
|
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
|
|
}
|
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
{
|
|
return false;
|
|
}
|
|
return result;
|
|
# else
|
|
// longjmp and clobber warnings. Volatile is required.
|
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
volatile bool result = true;
|
|
|
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
|
|
if (oldHandler == SIG_ERR)
|
|
return false;
|
|
|
|
volatile sigset_t oldMask;
|
|
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
|
return false;
|
|
|
|
if (setjmp(s_jmpNoSHA1))
|
|
result = false;
|
|
else
|
|
{
|
|
uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
|
|
|
|
uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
|
|
uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
|
|
uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2);
|
|
uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3);
|
|
uint32x4_t r5 = vsha1su1q_u32 (data1, data2);
|
|
|
|
// Hack... GCC optimizes away the code and returns true
|
|
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0));
|
|
}
|
|
|
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
|
signal(SIGILL, oldHandler);
|
|
return result;
|
|
# endif
|
|
#else
|
|
return false;
|
|
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
|
}
|
|
|
|
static bool TrySHA2()
|
|
{
|
|
#if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE)
|
|
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
|
|
volatile bool result = true;
|
|
__try
|
|
{
|
|
uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
|
|
|
|
uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
|
|
uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
|
|
uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
|
|
uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
|
|
|
|
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
|
|
}
|
|
__except (EXCEPTION_EXECUTE_HANDLER)
|
|
{
|
|
return false;
|
|
}
|
|
return result;
|
|
# else
|
|
// longjmp and clobber warnings. Volatile is required.
|
|
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
|
|
volatile bool result = true;
|
|
|
|
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
|
|
if (oldHandler == SIG_ERR)
|
|
return false;
|
|
|
|
volatile sigset_t oldMask;
|
|
if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
|
|
return false;
|
|
|
|
if (setjmp(s_jmpNoSHA2))
|
|
result = false;
|
|
else
|
|
{
|
|
uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
|
|
|
|
uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
|
|
uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
|
|
uint32x4_t r3 = vsha256su0q_u32 (data1, data2);
|
|
uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3);
|
|
|
|
// Hack... GCC optimizes away the code and returns true
|
|
result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3));
|
|
}
|
|
|
|
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
|
|
signal(SIGILL, oldHandler);
|
|
return result;
|
|
# endif
|
|
#else
|
|
return false;
|
|
#endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE
|
|
}
|
|
|
|
#if HAVE_GCC_CONSTRUCTOR1
|
|
void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures()
|
|
#elif HAVE_GCC_CONSTRUCTOR0
|
|
void __attribute__ ((constructor)) DetectArmFeatures()
|
|
#else
|
|
void DetectArmFeatures()
|
|
#endif
|
|
{
|
|
g_hasNEON = TryNEON();
|
|
g_hasPMULL = TryPMULL();
|
|
g_hasCRC32 = TryCRC32();
|
|
g_hasAES = TryAES();
|
|
g_hasSHA1 = TrySHA1();
|
|
g_hasSHA2 = TrySHA2();
|
|
|
|
*((volatile bool*)&g_ArmDetectionDone) = true;
|
|
}
|
|
|
|
#endif
|
|
|
|
NAMESPACE_END
|
|
|
|
#endif
|