Use inline ASM for CRC on Apple ARMv8

This commit is contained in:
Jeffrey Walton 2021-05-25 20:24:58 -04:00
parent c3d1eedff0
commit 1221e0dc9e
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
5 changed files with 184 additions and 42 deletions

View File

@ -455,11 +455,11 @@ ifneq ($(IS_ARMV8),0)
ifeq ($(DETECT_FEATURES),1)
ifeq ($(IS_IOS),1)
ASIMD_FLAG =
CRC_FLAG =
AES_FLAG =
PMUL_FLAG =
SHA_FLAG =
ASIMD_FLAG = -arch arm64
CRC_FLAG = -arch arm64
AES_FLAG = -arch arm64
PMUL_FLAG = -arch arm64
SHA_FLAG = -arch arm64
else
ASIMD_FLAG = -march=armv8-a
CRC_FLAG = -march=armv8-a+crc

View File

@ -2,20 +2,68 @@
#ifdef CRYPTOPP_ARM_NEON_HEADER
# include <arm_neon.h>
#endif
#ifdef CRYPTOPP_ARM_ACLE_HEADER
#if (CRYPTOPP_ARM_ACLE_HEADER)
# include <stdint.h>
# include <arm_acle.h>
#endif
// Keep sync'd with arm_simd.h
inline uint32_t CRC32B (uint32_t crc, uint8_t val)
{
#if defined(_MSC_VER)
return __crc32b(crc, val);
#else
uint32_t r;
__asm__ ("crc32b %w0, %w1, %w2 \n\t"
:"=r" (r) : "r" (crc), "r" (val) );
return r;
#endif
}
inline uint32_t CRC32W (uint32_t crc, uint32_t val)
{
#if defined(_MSC_VER)
return __crc32w(crc, val);
#else
uint32_t r;
__asm__ ("crc32w %w0, %w1, %w2 \n\t"
:"=r" (r) : "r" (crc), "r" (val) );
return r;
#endif
}
inline uint32_t CRC32CB (uint32_t crc, uint8_t val)
{
#if defined(_MSC_VER)
return __crc32cb(crc, val);
#else
uint32_t r;
__asm__ ("crc32cb %w0, %w1, %w2 \n\t"
:"=r" (r) : "r" (crc), "r" (val) );
return r;
#endif
}
inline uint32_t CRC32CW (uint32_t crc, uint32_t val)
{
#if defined(_MSC_VER)
return __crc32cw(crc, val);
#else
uint32_t r;
__asm__ ("crc32cw %w0, %w1, %w2 \n\t"
:"=r" (r) : "r" (crc), "r" (val) );
return r;
#endif
}
int main(int argc, char* argv[])
{
uint32_t w=0xffffffff;
w = __crc32w(w,w);
w = __crc32h(w,w);
w = __crc32b(w,w);
w = __crc32cw(w,w);
w = __crc32ch(w,w);
w = __crc32cb(w,w);
w = CRC32B(w,w);
w = CRC32W(w,w);
w = CRC32CB(w,w);
w = CRC32CW(w,w);
return 0;
}

View File

@ -13,10 +13,80 @@
# include <arm_neon.h>
#endif
//#if (CRYPTOPP_ARM_ACLE_HEADER)
//# include <stdint.h>
//# include <arm_acle.h>
//#endif
#if (CRYPTOPP_ARM_ACLE_HEADER)
# include <stdint.h>
# include <arm_acle.h>
#endif
#if (CRYPTOPP_ARM_CRC32_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
/// \brief CRC32
/// \param a the first value
/// \param b the second value
/// \return CRC32 value
/// \since Crypto++ 8.6
inline uint32_t CRC32B (uint32_t crc, uint8_t val)
{
#if defined(_MSC_VER)
return __crc32b(crc, val);
#else
uint32_t r;
__asm__ ("crc32b %w0, %w1, %w2 \n\t"
:"=r" (r) : "r" (crc), "r" (val) );
return r;
#endif
}
/// \brief CRC32
/// \param a the first value
/// \param b the second value
/// \return CRC32 value
/// \since Crypto++ 8.6
inline uint32_t CRC32W (uint32_t crc, uint32_t val)
{
#if defined(_MSC_VER)
return __crc32w(crc, val);
#else
uint32_t r;
__asm__ ("crc32w %w0, %w1, %w2 \n\t"
:"=r" (r) : "r" (crc), "r" (val) );
return r;
#endif
}
/// \brief CRC32-C
/// \param a the first value
/// \param b the second value
/// \return CRC32 value
/// \since Crypto++ 8.6
inline uint32_t CRC32CB (uint32_t crc, uint8_t val)
{
#if defined(_MSC_VER)
return __crc32cb(crc, val);
#else
uint32_t r;
__asm__ ("crc32cb %w0, %w1, %w2 \n\t"
:"=r" (r) : "r" (crc), "r" (val) );
return r;
#endif
}
/// \brief CRC32-C
/// \param a the first value
/// \param b the second value
/// \return CRC32 value
/// \since Crypto++ 8.6
inline uint32_t CRC32CW (uint32_t crc, uint32_t val)
{
#if defined(_MSC_VER)
return __crc32cw(crc, val);
#else
uint32_t r;
__asm__ ("crc32cw %w0, %w1, %w2 \n\t"
:"=r" (r) : "r" (crc), "r" (val) );
return r;
#endif
}
#endif // CRYPTOPP_ARM_CRC32_AVAILABLE
#if (CRYPTOPP_ARM_PMULL_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)

View File

@ -264,11 +264,11 @@
// ARMv8 and ASIMD. -march=armv8-a+crc or above must be present
// Requires GCC 4.8, Clang 3.3 or Visual Studio 2017
// Do not use APPLE_CLANG_VERSION; use __ARM_FEATURE_XXX instead.
#if !defined(CRYPTOPP_ARM_CRC32_AVAILABLE) && !defined(CRYPTOPP_DISABLE_ARM_CRC32)
# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
# if defined(__ARM_FEATURE_CRC32) || (CRYPTOPP_GCC_VERSION >= 40800) || \
(CRYPTOPP_LLVM_CLANG_VERSION >= 30300) || (CRYPTOPP_MSC_VERSION >= 1916)
(CRYPTOPP_LLVM_CLANG_VERSION >= 30300) || (CRYPTOPP_APPLE_CLANG_VERSION >= 40000) || \
(CRYPTOPP_MSC_VERSION >= 1916)
# define CRYPTOPP_ARM_CRC32_AVAILABLE 1
# endif // Compilers
# endif // Platforms

View File

@ -19,6 +19,10 @@
# include <arm_acle.h>
#endif
#if (CRYPTOPP_ARM_CRC32_AVAILABLE)
# include "arm_simd.h"
#endif
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
# include <signal.h>
# include <setjmp.h>
@ -50,19 +54,17 @@ extern "C" {
bool CPU_ProbeCRC32()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
return false;
#elif (CRYPTOPP_ARM_CRC32_AVAILABLE)
# if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
volatile bool result = true;
__try
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32w(w,x);
w = __crc32h(w,y);
w = __crc32b(w,z);
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
word32 w=0, x=1; byte z=3;
w = CRC32W(w,x);
w = CRC32B(w,z);
w = CRC32CW(w,x);
w = CRC32CB(w,z);
result = !!w;
}
@ -92,13 +94,11 @@ bool CPU_ProbeCRC32()
result = false;
else
{
word32 w=0, x=1; word16 y=2; byte z=3;
w = __crc32w(w,x);
w = __crc32h(w,y);
w = __crc32b(w,z);
w = __crc32cw(w,x);
w = __crc32ch(w,y);
w = __crc32cb(w,z);
word32 w=0, x=1; byte z=3;
w = CRC32W(w,x);
w = CRC32B(w,z);
w = CRC32CW(w,x);
w = CRC32CB(w,z);
// Hack... GCC optimizes away the code and returns true
result = !!w;
@ -118,25 +118,41 @@ bool CPU_ProbeCRC32()
void CRC32_Update_ARMV8(const byte *s, size_t n, word32& c)
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
c = __crc32b(c, *s);
c = CRC32B(c, *s);
for(; n > 4; s+=4, n-=4)
c = __crc32w(c, *(const word32 *)(void*)s);
for(; n >= 16; s+=16, n-=16)
{
c = CRC32W(c, *(const word32 *)(void*)(s+ 0));
c = CRC32W(c, *(const word32 *)(void*)(s+ 4));
c = CRC32W(c, *(const word32 *)(void*)(s+ 8));
c = CRC32W(c, *(const word32 *)(void*)(s+12));
}
for(; n >= 4; s+=4, n-=4)
c = CRC32W(c, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
c = __crc32b(c, *s);
c = CRC32B(c, *s);
}
void CRC32C_Update_ARMV8(const byte *s, size_t n, word32& c)
{
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
c = __crc32cb(c, *s);
c = CRC32CB(c, *s);
for(; n > 4; s+=4, n-=4)
c = __crc32cw(c, *(const word32 *)(void*)s);
for(; n >= 16; s+=16, n-=16)
{
c = CRC32CW(c, *(const word32 *)(void*)(s+ 0));
c = CRC32CW(c, *(const word32 *)(void*)(s+ 4));
c = CRC32CW(c, *(const word32 *)(void*)(s+ 8));
c = CRC32CW(c, *(const word32 *)(void*)(s+12));
}
for(; n >= 4; s+=4, n-=4)
c = CRC32CW(c, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)
c = __crc32cb(c, *s);
c = CRC32CB(c, *s);
}
#endif
@ -146,7 +162,15 @@ void CRC32C_Update_SSE42(const byte *s, size_t n, word32& c)
for(; !IsAligned<word32>(s) && n > 0; s++, n--)
c = _mm_crc32_u8(c, *s);
for(; n > 4; s+=4, n-=4)
for(; n >= 16; s+=16, n-=16)
{
c = _mm_crc32_u32(c, *(const word32 *)(void*)(s+ 0));
c = _mm_crc32_u32(c, *(const word32 *)(void*)(s+ 4));
c = _mm_crc32_u32(c, *(const word32 *)(void*)(s+ 8));
c = _mm_crc32_u32(c, *(const word32 *)(void*)(s+12));
}
for(; n >= 4; s+=4, n-=4)
c = _mm_crc32_u32(c, *(const word32 *)(void*)s);
for(; n > 0; s++, n--)