Updated etection code. Added ARMv8-a CRC32 implementations

This commit is contained in:
Jeffrey Walton 2016-05-06 04:35:42 -04:00
parent ed10758af2
commit fe4019b942
4 changed files with 84 additions and 24 deletions

29
cpu.cpp
View File

@ -22,14 +22,6 @@
#include <emmintrin.h>
#endif
#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# if defined(__linux__)
# include <sys/auxv.h>
# include <asm/hwcap.h>
# endif
# include <arm_neon.h>
#endif
NAMESPACE_BEGIN(CryptoPP)
// MacPorts/GCC does not provide constructor(priority). Apple/GCC and Fink/GCC do provide it.
@ -264,9 +256,9 @@ void DetectX86Features()
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
bool g_ArmDetectionDone = false;
bool g_hasNEON = false;
bool g_hasNEON = false, g_hasCRC32 = false;
// This is avaiable in a status register, but we need privileged code to execute the read
// This is avaiable in a status register, but we need privileged code to perform the read
word32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
#if HAVE_GCC_CONSTRUCTOR1
@ -277,17 +269,18 @@ void __attribute__ ((constructor)) DetectArmFeatures()
void DetectArmFeatures()
#endif
{
g_hasNEON = false;
#if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# if defined(__linux__) && defined(__aarch64__)
const long hwcaps = getauxval(AT_HWCAP);
#if defined(__linux__) && defined(__aarch64__)
const unsigned long hwcaps = getauxval(AT_HWCAP);
g_hasNEON = !!(hwcaps & HWCAP_ASIMD);
# elif defined(__linux__)
const long hwcaps = getauxval(AT_HWCAP);
g_hasCRC32 = !!(hwcaps & HWCAP_CRC32);
#elif defined(__linux__)
const unsigned long hwcaps = getauxval(AT_HWCAP);
g_hasNEON = !!(hwcaps & HWCAP_ARM_NEON);
# elif defined(_WIN32) && defined(_M_ARM)
// g_hasCRC32 = !!(hwcaps & HWCAP_ARM_CRC32);
g_hasCRC32 = false;
#elif defined(_WIN32) && defined(_M_ARM)
g_hasNEON = true;
# endif
g_hasCRC32 = false;
#endif
*((volatile bool*)&g_ArmDetectionDone) = true;
}

44
cpu.h
View File

@ -1,13 +1,30 @@
// cpu.h - written and placed in the public domain by Wei Dai
//! \file cpu.h
//! \brief Classes, functions, intrinsics and features for X86, X32 nd X64 assembly
//! \brief Functions for CPU features and intrinsics
//! \details At the moment, the functions are used heavily in X86/X32/X64 code paths
// for SSE, SSE2 and SSE4. The funtions are also used on occassion for AArch32
//! and AArch64 code paths for NEON.
#ifndef CRYPTOPP_CPU_H
#define CRYPTOPP_CPU_H
#include "config.h"
#if defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)
# if defined(__linux__)
# include <sys/auxv.h>
# include <asm/hwcap.h>
# include <stdint.h>
# endif
# if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
# include <arm_neon.h>
# endif
# if (__ARM_ACLE >= 200)
# include <arm_acle.h>
# endif
#endif // ARM-32 or ARM-64
#ifdef CRYPTOPP_GENERATE_X64_MASM
#define CRYPTOPP_X86_ASM_AVAILABLE
@ -217,12 +234,14 @@ inline int GetCacheLineSize()
return g_cacheLineSize;
}
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
#elif defined(CRYPTOPP_BOOL_ARM32) || defined(CRYPTOPP_BOOL_ARM64)
extern bool g_ArmDetectionDone;
extern bool g_hasNEON;
extern bool g_hasNEON, g_hasCRC32;
void CRYPTOPP_API DetectArmFeatures();
//! \brief Determine if an ARM processor has Advanced SIMD available
//! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise.
inline bool HasNEON()
{
if (!g_ArmDetectionDone)
@ -230,6 +249,19 @@ inline bool HasNEON()
return g_hasNEON;
}
//! \brief Determine if an ARM processor has CRC32 available
//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
inline bool HasCRC32()
{
if (!g_ArmDetectionDone)
DetectArmFeatures();
return g_hasCRC32;
}
//! \brief Provides the cache line size at runtime
//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
//! \details GetCacheLineSize() provides is an estimate using CRYPTOPP_L1_CACHE_LINE_SIZE.
//! The runtime instructions to query the processor are privileged.
inline int GetCacheLineSize()
{
return CRYPTOPP_L1_CACHE_LINE_SIZE;
@ -246,6 +278,8 @@ inline int GetCacheLineSize()
#endif
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
#ifdef CRYPTOPP_GENERATE_X64_MASM
#define AS1(x) x*newline*
#define AS2(x, y) x, y*newline*
@ -448,6 +482,8 @@ inline int GetCacheLineSize()
ASL(labelPrefix##9)\
AS2( add outputPtr, increment*16)
#endif // X86/X32/X64
NAMESPACE_END
#endif
#endif // CRYPTOPP_CPU_H

30
crc.cpp
View File

@ -131,6 +131,22 @@ CRC32::CRC32()
void CRC32::Update(const byte *s, size_t n)
{
#if defined(__ARM_FEATURE_CRC32)
if (HasCRC32())
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
m_crc = __crc32b(m_crc, *s);
for(; n > 4; s+=4, n-=4)
m_crc = __crc32w(m_crc, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
m_crc = __crc32b(m_crc, *s);
return;
}
#endif
word32 crc = m_crc;
for(; !IsAligned<word32>(s) && n > 0; n--)
@ -295,6 +311,20 @@ void CRC32C::Update(const byte *s, size_t n)
for(; n > 0; s++, n--)
m_crc = _mm_crc32_u8(m_crc, *s);
return;
}
#elif defined(__ARM_FEATURE_CRC32)
if (HasCRC32())
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
m_crc = __crc32cb(m_crc, *s);
for(; n > 4; s+=4, n-=4)
m_crc = __crc32cw(m_crc, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
m_crc = __crc32cb(m_crc, *s);
return;
}
#endif

View File

@ -314,9 +314,10 @@ bool TestSettings()
#elif defined(CRYPTOPP_BOOL_ARM32) || defined (CRYPTOPP_BOOL_ARM64)
bool hasNEON = HasNEON();
bool hasCRC32 = HasCRC32();
cout << "passed: ";
cout << "hasNEON == " << hasNEON << endl;
cout << "hasNEON == " << hasNEON << ", hasCRC32 == " << hasCRC32 << endl;
#endif
if (!pass)
@ -912,7 +913,7 @@ bool TestOS_RNG()
return pass;
}
#ifdef NO_OS_DEPENDENCE
#if defined(NO_OS_DEPENDENCE) || !defined(OS_RNG_AVAILABLE)
bool TestAutoSeeded()
{
return true;