mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-23 09:59:42 +00:00
Cryptogams is Andy Polyakov's project used to create high speed crypto algorithms and share them with other developers. Cryptogams has a dual license. First is the OpenSSL license because Andy contributes to OpenSSL. Second is a BSD license for those who want a more permissive license. Andy's implementation runs about 45% faster than C/C++ code. Testing on a 1.8 GHz Cortex-A17 shows Cryptograms at 45 cpb, and C++ at 79 cpb. The integration instructions are documented at [Cryptogams SHA](https://wiki.openssl.org/index.php/Cryptogams_SHA) on the OpenSSL wiki.
This commit is contained in:
parent
4c9ca6b723
commit
d38e5a954d
@ -318,6 +318,8 @@ sha1_armv4.h
|
||||
sha1_armv4.S
|
||||
sha256_armv4.h
|
||||
sha256_armv4.S
|
||||
sha512_armv4.h
|
||||
sha512_armv4.S
|
||||
sha3.cpp
|
||||
sha3.h
|
||||
shacal2.cpp
|
||||
|
@ -1059,7 +1059,7 @@ endif
|
||||
ifeq ($(IS_ARM32),1)
|
||||
CRYPTOGAMS_ARCH_FLAG = -march=armv7-a
|
||||
CRYPTOGAMS_ARCH_FLAG += -Wa,--noexecstack
|
||||
SRCS += aes_armv4.S sha1_armv4.S sha256_armv4.S
|
||||
SRCS += aes_armv4.S sha1_armv4.S sha256_armv4.S sha512_armv4.S
|
||||
endif
|
||||
|
||||
# List cryptlib.cpp first, then cpu.cpp, then integer.cpp to tame C++ static initialization problems.
|
||||
@ -1513,6 +1513,10 @@ sha1_armv4.o : sha1_armv4.S
|
||||
sha256_armv4.o : sha256_armv4.S
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||
|
||||
# Cryptogams ARM asm implementation.
|
||||
sha512_armv4.o : sha512_armv4.S
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||
|
||||
sha3_simd.o : sha3_simd.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(SHA3_FLAG) -c) $<
|
||||
|
||||
|
@ -594,7 +594,7 @@ ifeq ($(IS_ARM32),1)
|
||||
ifneq ($(IS_IOS),1)
|
||||
CRYPTOGAMS_ARCH_FLAG = -march=armv7-a
|
||||
CRYPTOGAMS_ARCH_FLAG += -Wa,--noexecstack
|
||||
SRCS += aes_armv4.S sha1_armv4.S sha256_armv4.S
|
||||
SRCS += aes_armv4.S sha1_armv4.S sha256_armv4.S sha512_armv4.S
|
||||
endif
|
||||
endif
|
||||
|
||||
@ -873,6 +873,10 @@ sha1_armv4.o : sha1_armv4.S
|
||||
sha256_armv4.o : sha256_armv4.S
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||
|
||||
# Cryptogams ARM asm implementation.
|
||||
sha512_armv4.o : sha512_armv4.S
|
||||
$(CC) $(strip $(CXXFLAGS) $(CRYPTOGAMS_ARCH_FLAG) -c) $<
|
||||
|
||||
# SSE4.2/SHA-NI or ARMv8a available
|
||||
shacal2_simd.o : shacal2_simd.cpp
|
||||
$(CXX) $(strip $(CXXFLAGS) $(SHA_FLAG) -c) $<
|
||||
|
@ -366,6 +366,7 @@
|
||||
# define CRYPTOGAMS_ARM_AES 1
|
||||
# define CRYPTOGAMS_ARM_SHA1 1
|
||||
# define CRYPTOGAMS_ARM_SHA256 1
|
||||
# define CRYPTOGAMS_ARM_SHA512 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
139
sha.cpp
139
sha.cpp
@ -1,29 +1,38 @@
|
||||
// sha.cpp - modified by Wei Dai from Steve Reid's public domain sha1.c
|
||||
|
||||
// Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey Walton
|
||||
// implemented Intel SHA extensions based on Intel articles and code by
|
||||
// Sean Gulley. Jeffrey Walton implemented ARM SHA based on ARM code and
|
||||
// code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke.
|
||||
// All code is in the public domain.
|
||||
// Steve Reid implemented SHA-1. Wei Dai implemented SHA-2. Jeffrey
|
||||
// Walton implemented Intel SHA extensions based on Intel articles and code
|
||||
// by Sean Gulley. Jeffrey Walton implemented ARM SHA based on ARM code and
|
||||
// code from Johannes Schneiders, Skip Hovsmith and Barry O'Rourke. All
|
||||
// code is in the public domain.
|
||||
|
||||
// In August 2017 JW reworked the internals to align all the implementations.
|
||||
// Formerly all hashes were software based, IterHashBase handled endian conversions,
|
||||
// and IterHashBase dispatched a single to block SHA{N}::Transform. SHA{N}::Transform
|
||||
// then performed the single block hashing. It was repeated for multiple blocks.
|
||||
// In August 2017 JW reworked the internals to align all the
|
||||
// implementations. Formerly all hashes were software based, IterHashBase
|
||||
// handled endian conversions, and IterHashBase dispatched a single to
|
||||
// block SHA{N}::Transform. SHA{N}::Transform then performed the single
|
||||
// block hashing. It was repeated for multiple blocks.
|
||||
//
|
||||
// The rework added SHA{N}::HashMultipleBlocks (class) and SHA{N}_HashMultipleBlocks
|
||||
// (free standing). There are also hardware accelerated variations. Callers enter
|
||||
// SHA{N}::HashMultipleBlocks (class), and the function calls SHA{N}_HashMultipleBlocks
|
||||
// (free standing) or SHA{N}_HashBlock (free standing) as a fallback.
|
||||
// The rework added SHA{N}::HashMultipleBlocks (class) and
|
||||
// SHA{N}_HashMultipleBlocks (free standing). There are also hardware
|
||||
// accelerated variations. Callers enter SHA{N}::HashMultipleBlocks (class)
|
||||
// and the function calls SHA{N}_HashMultipleBlocks (free standing) or
|
||||
// SHA{N}_HashBlock (free standing) as a fallback.
|
||||
//
|
||||
// An added wrinkle is hardware is little endian, C++ is big endian, and callers use
|
||||
// big endian, so SHA{N}_HashMultipleBlock accepts a ByteOrder for the incoming data
|
||||
// arrangement. Hardware based SHA{N}_HashMultipleBlock can often perform the endian
|
||||
// swap much easier by setting an EPI mask. Endian swap incurs no penalty on Intel SHA,
|
||||
// and 4-instruction penalty on ARM SHA. Under C++ the full software based swap penalty
|
||||
// is incurred due to use of ReverseBytes().
|
||||
// An added wrinkle is hardware is little endian, C++ is big endian, and
|
||||
// callers use big endian, so SHA{N}_HashMultipleBlock accepts a ByteOrder
|
||||
// for the incoming data arrangement. Hardware based SHA{N}_HashMultipleBlock
|
||||
// can often perform the endian swap much easier by setting an EPI mask.
|
||||
// Endian swap incurs no penalty on Intel SHA, and 4-instruction penalty on
|
||||
// ARM SHA. Under C++ the full software based swap penalty is incurred due
|
||||
// to use of ReverseBytes().
|
||||
//
|
||||
// The rework also removed the hacked-in pointers to implementations.
|
||||
// In May 2019 JW added Cryptogams ARMv7 and NEON implementations for SHA1,
|
||||
// SHA256 and SHA512. The Cryptogams code closed a performance gap on modern
|
||||
// 32-bit ARM devices. Cryptogams is Andy Polyakov's project used to create
|
||||
// high speed crypto algorithms and share them with other developers. Andy's
|
||||
// code runs 30% to 50% faster than C/C++ code. The Cryptogams code can be
|
||||
// disabled in config_asm.h. An example of integrating Andy's code is at
|
||||
// https://wiki.openssl.org/index.php/Cryptogams_SHA.
|
||||
|
||||
// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM sha.cpp" to generate MASM code
|
||||
|
||||
@ -56,7 +65,7 @@ extern void SHA1_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, siz
|
||||
extern void SHA256_HashMultipleBlocks_SHANI(word32 *state, const word32 *data, size_t length, ByteOrder order);
|
||||
#endif
|
||||
|
||||
#if (CRYPTOGAMS_ARM_SHA1)
|
||||
#if CRYPTOGAMS_ARM_SHA1
|
||||
extern "C" unsigned int CRYPTOGAMS_armcaps;
|
||||
extern "C" int sha1_block_data_order(word32* state, const word32 *data, size_t blocks);
|
||||
#endif
|
||||
@ -69,7 +78,7 @@ extern void SHA1_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, siz
|
||||
extern void SHA256_HashMultipleBlocks_ARMV8(word32 *state, const word32 *data, size_t length, ByteOrder order);
|
||||
#endif
|
||||
|
||||
#if (CRYPTOGAMS_ARM_SHA256)
|
||||
#if CRYPTOGAMS_ARM_SHA256
|
||||
extern "C" unsigned int CRYPTOGAMS_armcaps;
|
||||
extern "C" int sha256_block_data_order(word32* state, const word32 *data, size_t blocks);
|
||||
#endif
|
||||
@ -83,6 +92,11 @@ extern void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data,
|
||||
extern void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t length, ByteOrder order);
|
||||
#endif
|
||||
|
||||
#if CRYPTOGAMS_ARM_SHA512
|
||||
extern "C" unsigned int CRYPTOGAMS_armcaps;
|
||||
extern "C" int sha512_block_data_order(word64* state, const word64 *data, size_t blocks);
|
||||
#endif
|
||||
|
||||
// We add extern to export table to sha_simd.cpp, but it
|
||||
// cleared http://github.com/weidai11/cryptopp/issues/502
|
||||
extern const word32 SHA256_K[64];
|
||||
@ -153,6 +167,23 @@ const word32 SHA256_K[64] = {
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
#if CRYPTOGAMS_ARM_SHA1 || CRYPTOGAMS_ARM_SHA256 || CRYPTOGAMS_ARM_SHA512
|
||||
inline bool CryptogamsArmCaps()
|
||||
{
|
||||
// The Cryptogams code uses a global variable named CRYPTOGAMS_armcaps
|
||||
// for capabilities like ARMv7 and NEON. Storage is allocated in the
|
||||
// module. We still need to set CRYPTOGAMS_armcaps accordingly.
|
||||
// The Cryptogams code defines NEON as 1<<0; see ARMV7_NEON.
|
||||
*const_cast<volatile unsigned int*>(&CRYPTOGAMS_armcaps) = CryptoPP::HasNEON() ? (1<<0) : 0;
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
////////////////////////////////
|
||||
// start of Steve Reid's code //
|
||||
////////////////////////////////
|
||||
@ -276,11 +307,19 @@ void SHA1::Transform(word32 *state, const word32 *data)
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOGAMS_ARM_SHA1 && 0
|
||||
// TODO: convert LE to BE and use Cryptogams code
|
||||
#if CRYPTOGAMS_ARM_SHA1
|
||||
if (HasARMv7())
|
||||
{
|
||||
static const bool unused = CryptogamsArmCaps();
|
||||
CRYPTOPP_UNUSED(unused);
|
||||
|
||||
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
word32 dataBuf[16];
|
||||
ByteReverse(dataBuf, data, SHA1::BLOCKSIZE);
|
||||
sha1_block_data_order(state, data, 1);
|
||||
# else
|
||||
sha1_block_data_order(state, data, 1);
|
||||
# endif
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -310,11 +349,7 @@ size_t SHA1::HashMultipleBlocks(const word32 *input, size_t length)
|
||||
#if CRYPTOGAMS_ARM_SHA1
|
||||
if (HasARMv7())
|
||||
{
|
||||
// The Cryptogams code uses a global variable named CRYPTOGAMS_armcaps
|
||||
// for capabilities like ARMv7 and NEON. Storage is allocated in the
|
||||
// module. We still need to set CRYPTOGAMS_armcaps accordingly.
|
||||
// The Cryptogams code defines NEON as 1<<0; see ARMV7_NEON.
|
||||
static const unsigned int unused = CRYPTOGAMS_armcaps = HasNEON() ? (1<<0) : 0;
|
||||
static const bool unused = CryptogamsArmCaps();
|
||||
CRYPTOPP_UNUSED(unused);
|
||||
|
||||
sha1_block_data_order(m_state, input, length / SHA1::BLOCKSIZE);
|
||||
@ -823,11 +858,19 @@ void SHA256::Transform(word32 *state, const word32 *data)
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOGAMS_ARM_SHA256 && 0
|
||||
// TODO: convert LE to BE and use Cryptogams code
|
||||
#if CRYPTOGAMS_ARM_SHA256
|
||||
if (HasARMv7())
|
||||
{
|
||||
static const bool unused = CryptogamsArmCaps();
|
||||
CRYPTOPP_UNUSED(unused);
|
||||
|
||||
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
word32 dataBuf[16];
|
||||
ByteReverse(dataBuf, data, SHA256::BLOCKSIZE);
|
||||
sha256_block_data_order(state, data, 1);
|
||||
# else
|
||||
sha256_block_data_order(state, data, 1);
|
||||
# endif
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@ -872,11 +915,7 @@ size_t SHA256::HashMultipleBlocks(const word32 *input, size_t length)
|
||||
#if CRYPTOGAMS_ARM_SHA256
|
||||
if (HasARMv7())
|
||||
{
|
||||
// The Cryptogams code uses a global variable named CRYPTOGAMS_armcaps
|
||||
// for capabilities like ARMv7 and NEON. Storage is allocated in the
|
||||
// module. We still need to set CRYPTOGAMS_armcaps accordingly.
|
||||
// The Cryptogams code defines NEON as 1<<0; see ARMV7_NEON.
|
||||
static const unsigned int unused = CRYPTOGAMS_armcaps = HasNEON() ? (1<<0) : 0;
|
||||
static const bool unused = CryptogamsArmCaps();
|
||||
CRYPTOPP_UNUSED(unused);
|
||||
|
||||
sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
|
||||
@ -942,11 +981,7 @@ size_t SHA224::HashMultipleBlocks(const word32 *input, size_t length)
|
||||
#if CRYPTOGAMS_ARM_SHA256
|
||||
if (HasARMv7())
|
||||
{
|
||||
// The Cryptogams code uses a global variable named CRYPTOGAMS_armcaps
|
||||
// for capabilities like ARMv7 and NEON. Storage is allocated in the
|
||||
// module. We still need to set CRYPTOGAMS_armcaps accordingly.
|
||||
// The Cryptogams code defines NEON as 1<<0; see ARMV7_NEON.
|
||||
static const unsigned int unused = CRYPTOGAMS_armcaps = HasNEON() ? (1<<0) : 0;
|
||||
static const bool unused = CryptogamsArmCaps();
|
||||
CRYPTOPP_UNUSED(unused);
|
||||
|
||||
sha256_block_data_order(m_state, input, length / SHA256::BLOCKSIZE);
|
||||
@ -997,6 +1032,12 @@ std::string SHA512_AlgorithmProvider()
|
||||
if (HasSSE2())
|
||||
return "SSE2";
|
||||
#endif
|
||||
#if CRYPTOGAMS_ARM_SHA512
|
||||
if (HasNEON())
|
||||
return "NEON";
|
||||
if (HasARMv7())
|
||||
return "ARMv7";
|
||||
#endif
|
||||
#if (CRYPTOPP_POWER8_SHA_AVAILABLE)
|
||||
if (HasSHA512())
|
||||
return "Power8";
|
||||
@ -1303,6 +1344,22 @@ void SHA512::Transform(word64 *state, const word64 *data)
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOGAMS_ARM_SHA512
|
||||
if (HasARMv7())
|
||||
{
|
||||
static const bool unused = CryptogamsArmCaps();
|
||||
CRYPTOPP_UNUSED(unused);
|
||||
|
||||
# if defined(CRYPTOPP_LITTLE_ENDIAN)
|
||||
word64 dataBuf[16];
|
||||
ByteReverse(dataBuf, data, SHA512::BLOCKSIZE);
|
||||
sha512_block_data_order(state, dataBuf, 1);
|
||||
# else
|
||||
sha512_block_data_order(state, data, 1);
|
||||
# endif
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if CRYPTOPP_POWER8_SHA_AVAILABLE
|
||||
if (HasSHA512())
|
||||
{
|
||||
|
1881
sha512_armv4.S
Normal file
1881
sha512_armv4.S
Normal file
File diff suppressed because it is too large
Load Diff
21
sha512_armv4.h
Normal file
21
sha512_armv4.h
Normal file
@ -0,0 +1,21 @@
|
||||
/* Header file for use with Cryptogam's ARMv4 SHA512. */
|
||||
/* Also see http://www.openssl.org/~appro/cryptogams/ */
|
||||
/* https://wiki.openssl.org/index.php/Cryptogams_SHA. */
|
||||
|
||||
#ifndef CRYPTOGAMS_SHA512_ARMV4_H
|
||||
#define CRYPTOGAMS_SHA512_ARMV4_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void sha512_block_data_order(void *state, const void *data, size_t blocks);
|
||||
|
||||
/* Cryptogams arm caps */
|
||||
#define ARMV7_NEON (1<<0)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CRYPTOGAMS_SHA512_ARMV4_H */
|
Loading…
Reference in New Issue
Block a user