mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-23 01:49:41 +00:00
Add Power8 AES decryption
This commit is contained in:
parent
cfb63decec
commit
b090e5f69f
@ -154,10 +154,11 @@ const byte Rijndael::Base::Sd[256] = {
|
||||
0x55, 0x21, 0x0c, 0x7d,
|
||||
};
|
||||
|
||||
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
|
||||
const word32 Rijndael::Base::rcon[] = {
|
||||
0x01000000, 0x02000000, 0x04000000, 0x08000000,
|
||||
0x10000000, 0x20000000, 0x40000000, 0x80000000,
|
||||
0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
|
||||
0x1B000000, 0x36000000
|
||||
};
|
||||
|
||||
NAMESPACE_END
|
||||
|
@ -45,8 +45,9 @@
|
||||
|
||||
// Don't include <arm_acle.h> when using Apple Clang. Early Apple compilers
|
||||
// fail to compile with <arm_acle.h> included. Later Apple compilers compile
|
||||
// intrinsics without <arm_acle.h> included.
|
||||
#if (CRYPTOPP_ARM_AES_AVAILABLE) && !defined(CRYPTOPP_APPLE_CLANG_VERSION)
|
||||
// intrinsics without <arm_acle.h> included. Also avoid it with GCC 4.8.
|
||||
#if (CRYPTOPP_ARM_AES_AVAILABLE) && !defined(CRYPTOPP_APPLE_CLANG_VERSION) && \
|
||||
(!defined(CRYPTOPP_GCC_VERSION) || (CRYPTOPP_GCC_VERSION >= 40900))
|
||||
# include <arm_acle.h>
|
||||
#endif
|
||||
|
||||
@ -158,6 +159,24 @@ bool CPU_ProbeAES()
|
||||
}
|
||||
#endif // ARM32 or ARM64
|
||||
|
||||
ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one[] = {0, 0, 0, 1<<24};
|
||||
|
||||
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_rconLE[] = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
|
||||
};
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_rconBE[] = {
|
||||
0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000,
|
||||
0x20000000, 0x40000000, 0x80000000, 0x1B000000, 0x36000000
|
||||
};
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
// ***************************** ARMv8 ***************************** //
|
||||
|
||||
#if (CRYPTOPP_ARM_AES_AVAILABLE)
|
||||
@ -323,15 +342,6 @@ inline void ARMV8_Dec_4_Blocks(uint8x16_t &block0, uint8x16_t &block1, uint8x16_
|
||||
block3 = veorq_u8(block3, vld1q_u8(keys+(i+1)*16));
|
||||
}
|
||||
|
||||
const word32 s_one[] = {0, 0, 0, 1<<24};
|
||||
|
||||
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
|
||||
const word32 rcon[] = {
|
||||
0x01, 0x02, 0x04, 0x08,
|
||||
0x10, 0x20, 0x40, 0x80,
|
||||
0x1B, 0x36
|
||||
};
|
||||
|
||||
template <typename F1, typename F4>
|
||||
size_t Rijndael_AdvancedProcessBlocks_ARMV8(F1 func1, F4 func4, const word32 *subKeys, size_t rounds,
|
||||
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
|
||||
@ -537,9 +547,6 @@ inline void AESNI_Dec_4_Blocks(__m128i &block0, __m128i &block1, __m128i &block2
|
||||
block3 = _mm_aesdeclast_si128(block3, rk);
|
||||
}
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
static const word32 s_one[] = {0, 0, 0, 1<<24};
|
||||
|
||||
template <typename F1, typename F4>
|
||||
inline size_t Rijndael_AdvancedProcessBlocks_AESNI(F1 func1, F4 func4,
|
||||
MAYBE_CONST word32 *subKeys, size_t rounds, const byte *inBlocks,
|
||||
@ -680,16 +687,9 @@ size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro
|
||||
sk, rounds, ib, xb, outBlocks, length, flags);
|
||||
}
|
||||
|
||||
void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, word32 *rk)
|
||||
void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, word32 *rk, unsigned int rounds)
|
||||
{
|
||||
const unsigned rounds = static_cast<unsigned int>(keyLen/4 + 6);
|
||||
static const word32 rcLE[] = {
|
||||
0x01, 0x02, 0x04, 0x08,
|
||||
0x10, 0x20, 0x40, 0x80,
|
||||
0x1B, 0x36, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
|
||||
};
|
||||
|
||||
const word32 *ro = rcLE, *rc = rcLE;
|
||||
const word32 *ro = s_rconLE, *rc = s_rconLE;
|
||||
CRYPTOPP_UNUSED(ro);
|
||||
|
||||
__m128i temp = _mm_loadu_si128(M128_CAST(userKey+keyLen-16));
|
||||
@ -700,7 +700,7 @@ void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, wor
|
||||
const word32* end = rk + keySize;
|
||||
while (true)
|
||||
{
|
||||
CRYPTOPP_ASSERT(rc < ro + COUNTOF(rcLE));
|
||||
CRYPTOPP_ASSERT(rc < ro + COUNTOF(s_rconLE));
|
||||
rk[keyLen/4] = rk[0] ^ _mm_extract_epi32(_mm_aeskeygenassist_si128(temp, 0), 3) ^ *(rc++);
|
||||
rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
|
||||
rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
|
||||
@ -1011,17 +1011,21 @@ void Rijndael_Dec_ProcessAndXorBlock_POWER8(const word32 *subkeys, size_t rounds
|
||||
const byte *keys = reinterpret_cast<const byte*>(subkeys);
|
||||
|
||||
VectorType s = VectorLoad(inBlock);
|
||||
VectorType k = VectorLoadAligned(keys);
|
||||
VectorType k = VectorLoadAligned(rounds*16, keys);
|
||||
|
||||
s = VectorXor(s, k);
|
||||
for (size_t i=1; i<rounds-1; i+=2)
|
||||
for (size_t i=rounds-1; i>1; i-=2)
|
||||
{
|
||||
s = VectorDecrypt(s, VectorLoadAligned( i*16, keys));
|
||||
s = VectorDecrypt(s, VectorLoadAligned((i+1)*16, keys));
|
||||
s = VectorDecrypt(s, VectorLoadAligned((i-1)*16, keys));
|
||||
}
|
||||
|
||||
s = VectorDecrypt(s, VectorLoadAligned((rounds-1)*16, keys));
|
||||
s = VectorDecryptLast(s, VectorLoadAligned(rounds*16, keys));
|
||||
s = VectorDecrypt(s, VectorLoadAligned(16, keys));
|
||||
s = VectorDecryptLast(s, VectorLoadAligned(0, keys));
|
||||
|
||||
// According to benchmarks this is a tad bit slower
|
||||
// if (xorBlock)
|
||||
// s = VectorXor(s, VectorLoad(xorBlock));
|
||||
|
||||
VectorType x = xorBlock ? VectorLoad(xorBlock) : (VectorType) {0};
|
||||
s = VectorXor(s, x);
|
||||
|
54
rijndael.cpp
54
rijndael.cpp
@ -113,6 +113,19 @@ CRYPTOPP_ALIGN_DATA(16) static word32 Td[256*4];
|
||||
|
||||
static volatile bool s_TeFilled = false, s_TdFilled = false;
|
||||
|
||||
ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_one[] = {0, 0, 0, 1<<24};
|
||||
|
||||
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
|
||||
CRYPTOPP_ALIGN_DATA(16)
|
||||
const word32 s_rconLE[] = {
|
||||
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
|
||||
};
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
// ************************* Portable Code ************************************
|
||||
|
||||
#define QUARTER_ROUND(L, T, t, a, b, c, d) \
|
||||
@ -221,7 +234,7 @@ void Rijndael::Base::FillDecTable()
|
||||
}
|
||||
|
||||
#if (CRYPTOPP_AESNI_AVAILABLE)
|
||||
extern void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, word32* rk);
|
||||
extern void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, word32* rk, unsigned int rounds);
|
||||
extern void Rijndael_UncheckedSetKeyRev_AESNI(word32 *key, unsigned int rounds);
|
||||
|
||||
extern size_t Rijndael_Enc_AdvancedProcessBlocks_AESNI(const word32 *subkeys, size_t rounds,
|
||||
@ -240,8 +253,6 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si
|
||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||
extern void ByteReverseArrayLE(byte src[16]);
|
||||
|
||||
extern void Rijndael_UncheckedSetKey_POWER8(const byte *userKey, size_t keyLen, word32 *rk, CipherDir dir);
|
||||
|
||||
extern void Rijndael_Enc_ProcessAndXorBlock_POWER8(const word32 *subkeys, size_t rounds,
|
||||
const byte *inBlock, const byte *xorBlock, byte *outBlock);
|
||||
extern void Rijndael_Dec_ProcessAndXorBlock_POWER8(const word32 *subkeys, size_t rounds,
|
||||
@ -263,7 +274,7 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c
|
||||
{
|
||||
// TODO: Add non-SSE4.1 variant for low-end Atoms. The low-end
|
||||
// Atoms have SSE2-SSSE3 and AES-NI, but not SSE4.1 or SSE4.2.
|
||||
Rijndael_UncheckedSetKey_SSE4_AESNI(userKey, keyLen, rk);
|
||||
Rijndael_UncheckedSetKey_SSE4_AESNI(userKey, keyLen, rk, m_rounds);
|
||||
if (!IsForwardTransformation())
|
||||
Rijndael_UncheckedSetKeyRev_AESNI(m_key, m_rounds);
|
||||
|
||||
@ -306,6 +317,25 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c
|
||||
|
||||
rk = m_key;
|
||||
|
||||
#if CRYPTOPP_POWER8_AES_AVAILABLE
|
||||
if (HasAES())
|
||||
{
|
||||
ConditionalByteReverse(BIG_ENDIAN_ORDER, rk, rk, 16);
|
||||
ConditionalByteReverse(BIG_ENDIAN_ORDER, rk + m_rounds*4, rk + m_rounds*4, 16);
|
||||
ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (m_rounds-1)*16);
|
||||
|
||||
#if defined(IS_LITTLE_ENDIAN)
|
||||
// VSX registers are big-endian. The entire subkey table must be byte
|
||||
// reversed on little-endian systems to ensure it loads properly.
|
||||
byte * ptr = reinterpret_cast<byte*>(rk);
|
||||
for (unsigned int i=0; i<=m_rounds; i++)
|
||||
ByteReverseArrayLE(ptr+i*16);
|
||||
#endif // IS_LITTLE_ENDIAN
|
||||
|
||||
return;
|
||||
}
|
||||
#endif // CRYPTOPP_POWER8_AES_AVAILABLE
|
||||
|
||||
if (IsForwardTransformation())
|
||||
{
|
||||
if (!s_TeFilled)
|
||||
@ -351,20 +381,6 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c
|
||||
if (HasAES())
|
||||
ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (m_rounds-1)*16);
|
||||
#endif
|
||||
#if CRYPTOPP_POWER8_AES_AVAILABLE
|
||||
if (IsForwardTransformation() && HasAES())
|
||||
{
|
||||
ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (m_rounds-1)*16);
|
||||
|
||||
#if defined(IS_LITTLE_ENDIAN)
|
||||
// VSX registers are big-endian. The entire subkey table must be byte
|
||||
// reversed on little-endian systems to ensure it loads properly.
|
||||
byte * ptr = reinterpret_cast<byte*>(rk);
|
||||
for (unsigned int i=0; i<=m_rounds; i++)
|
||||
ByteReverseArrayLE(ptr+i*16);
|
||||
#endif // IS_LITTLE_ENDIAN
|
||||
}
|
||||
#endif // CRYPTOPP_POWER8_AES_AVAILABLE
|
||||
}
|
||||
|
||||
void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
|
||||
@ -483,7 +499,7 @@ void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE) && 0
|
||||
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
|
||||
if (HasAES())
|
||||
{
|
||||
(void)Rijndael_Dec_ProcessAndXorBlock_POWER8(m_key, m_rounds, inBlock, xorBlock, outBlock);
|
||||
|
Loading…
Reference in New Issue
Block a user