mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-23 09:59:42 +00:00
304809a65d
Performance increased by about 115% on a 980 MHz BananaPi dev-board. Throughput went from about 46.2 cpb to about 21.5 cpb.
390 lines
12 KiB
C++
390 lines
12 KiB
C++
// speck.cpp - written and placed in the public domain by Jeffrey Walton
|
|
|
|
#include "pch.h"
|
|
#include "config.h"
|
|
|
|
#include "speck.h"
|
|
#include "misc.h"
|
|
#include "cpu.h"
|
|
|
|
// Uncomment for benchmarking C++ against SSE2 or NEON.
|
|
// Do so in both speck.cpp and speck-simd.cpp.
|
|
// #undef CRYPTOPP_SSSE3_AVAILABLE
|
|
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
|
|
|
|
// Disable NEON/ASIMD for Cortex-A53 and A57. The shifts are too slow and C/C++ is about
|
|
// 3 cpb faster than NEON/ASIMD. Also see http://github.com/weidai11/cryptopp/issues/367.
|
|
#if (defined(__aarch32__) || defined(__aarch64__)) && defined(CRYPTOPP_SLOW_ARMV8_SHIFT)
|
|
# undef CRYPTOPP_ARM_NEON_AVAILABLE
|
|
#endif
|
|
|
|
ANONYMOUS_NAMESPACE_BEGIN
|
|
|
|
using CryptoPP::word32;
|
|
using CryptoPP::word64;
|
|
using CryptoPP::rotlFixed;
|
|
using CryptoPP::rotrFixed;
|
|
|
|
//! \brief Forward round transformation
|
|
//! \tparam W word type
|
|
//! \details TF83() is the forward round transformation using a=8 and b=3 rotations.
|
|
//! The initial test implementation provided template parameters, but they were
|
|
//! removed because SPECK32 using a=7 and b=2 was not on the road map. The
|
|
//! additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt
|
|
//! kind of messy.
|
|
template <class W>
|
|
inline void TF83(W& x, W& y, const W k)
|
|
{
|
|
x = rotrFixed(x, 8);
|
|
x += y; x ^= k;
|
|
y = rotlFixed(y, 3);
|
|
y ^= x;
|
|
}
|
|
|
|
//! \brief Reverse round transformation
|
|
//! \tparam W word type
|
|
//! \details TR83() is the reverse round transformation using a=8 and b=3 rotations.
|
|
//! The initial test implementation provided template parameters, but they were
|
|
//! removed because SPECK32 using a=7 and b=2 was not on the road map. The
|
|
//! additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt
|
|
//! kind of messy.
|
|
template <class W>
|
|
inline void TR83(W& x, W& y, const W k)
|
|
{
|
|
y ^= x;
|
|
y = rotrFixed(y,3);
|
|
x ^= k; x -= y;
|
|
x = rotlFixed(x,8);
|
|
}
|
|
|
|
//! \brief Forward transformation
|
|
//! \tparam W word type
|
|
//! \tparam R number of rounds
|
|
//! \param c output array
|
|
//! \param p input array
|
|
//! \param k subkey array
|
|
template <class W, unsigned int R>
|
|
inline void SPECK_Encrypt(W c[2], const W p[2], const W k[R])
|
|
{
|
|
c[0]=p[0]; c[1]=p[1];
|
|
|
|
// Don't unroll this loop. Things slow down.
|
|
for (size_t i=0; static_cast<int>(i)<R; ++i)
|
|
TF83(c[0], c[1], k[i]);
|
|
}
|
|
|
|
//! \brief Reverse transformation
|
|
//! \tparam W word type
|
|
//! \tparam R number of rounds
|
|
//! \param p output array
|
|
//! \param c input array
|
|
//! \param k subkey array
|
|
template <class W, unsigned int R>
|
|
inline void SPECK_Decrypt(W p[2], const W c[2], const W k[R])
|
|
{
|
|
p[0]=c[0]; p[1]=c[1];
|
|
|
|
// Don't unroll this loop. Things slow down.
|
|
for (size_t i=R-1; static_cast<int>(i)>=0; --i)
|
|
TR83(p[0], p[1], k[i]);
|
|
}
|
|
|
|
//! \brief Subkey generation function
|
|
//! \details Used when the user key consists of 2 words
|
|
//! \tparam W word type
|
|
//! \tparam R number of rounds
|
|
//! \param key empty subkey array
|
|
//! \param k user key array
|
|
template <class W, unsigned int R>
|
|
inline void SPECK_ExpandKey_2W(W key[R], const W k[2])
|
|
{
|
|
CRYPTOPP_ASSERT(R==32);
|
|
W i=0, B=k[0], A=k[1];
|
|
|
|
while (i<R-1)
|
|
{
|
|
key[i]=A; TF83(B, A, i);
|
|
i++;
|
|
}
|
|
key[R-1]=A;
|
|
}
|
|
|
|
//! \brief Subkey generation function
|
|
//! \details Used when the user key consists of 3 words
|
|
//! \tparam W word type
|
|
//! \tparam R number of rounds
|
|
//! \param key empty subkey array
|
|
//! \param k user key array
|
|
template <class W, unsigned int R>
|
|
inline void SPECK_ExpandKey_3W(W key[R], const W k[3])
|
|
{
|
|
CRYPTOPP_ASSERT(R==33 || R==26);
|
|
W i=0, C=k[0], B=k[1], A=k[2];
|
|
|
|
unsigned int blocks = R/2;
|
|
while (blocks--)
|
|
{
|
|
key[i+0]=A; TF83(B, A, i+0);
|
|
key[i+1]=A; TF83(C, A, i+1);
|
|
i+=2;
|
|
}
|
|
|
|
// The constexpr residue should allow the optimizer to remove unneeded statements
|
|
if(R%2 == 1)
|
|
{
|
|
key[R-1]=A;
|
|
}
|
|
}
|
|
|
|
//! \brief Subkey generation function
|
|
//! \details Used when the user key consists of 4 words
|
|
//! \tparam W word type
|
|
//! \tparam R number of rounds
|
|
//! \param key empty subkey array
|
|
//! \param k user key array
|
|
template <class W, unsigned int R>
|
|
inline void SPECK_ExpandKey_4W(W key[R], const W k[4])
|
|
{
|
|
CRYPTOPP_ASSERT(R==34 || R==27);
|
|
W i=0, D=k[0], C=k[1], B=k[2], A=k[3];
|
|
|
|
unsigned int blocks = R/3;
|
|
while (blocks--)
|
|
{
|
|
key[i+0]=A; TF83(B, A, i+0);
|
|
key[i+1]=A; TF83(C, A, i+1);
|
|
key[i+2]=A; TF83(D, A, i+2);
|
|
i+=3;
|
|
}
|
|
|
|
// The constexpr residue should allow the optimizer to remove unneeded statements
|
|
if(R%3 == 1)
|
|
{
|
|
key[R-1]=A;
|
|
}
|
|
else if(R%3 == 2)
|
|
{
|
|
key[R-2]=A; TF83(B, A, W(R-2));
|
|
key[R-1]=A;
|
|
}
|
|
}
|
|
|
|
ANONYMOUS_NAMESPACE_END
|
|
|
|
///////////////////////////////////////////////////////////
|
|
|
|
NAMESPACE_BEGIN(CryptoPP)
|
|
|
|
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
|
|
extern size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
|
|
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
|
|
|
|
extern size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
|
|
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
|
|
#endif
|
|
|
|
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
|
|
extern size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
|
|
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
|
|
|
|
extern size_t SPECK128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
|
|
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
|
|
#endif
|
|
|
|
void SPECK64::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs ¶ms)
|
|
{
|
|
CRYPTOPP_ASSERT(keyLength == 12 || keyLength == 16);
|
|
CRYPTOPP_UNUSED(params);
|
|
|
|
// Building the key schedule table requires {3,4} words workspace.
|
|
// Encrypting and decrypting requires 4 words workspace.
|
|
m_kwords = keyLength/sizeof(word32);
|
|
m_wspace.New(STDMAX(m_kwords,4U));
|
|
GetUserKey(BIG_ENDIAN_ORDER, m_wspace.begin(), m_kwords, userKey, keyLength);
|
|
|
|
switch (m_kwords)
|
|
{
|
|
case 3:
|
|
m_rkeys.New(26);
|
|
m_rounds = 26;
|
|
SPECK_ExpandKey_3W<word32, 26>(m_rkeys, m_wspace);
|
|
break;
|
|
case 4:
|
|
m_rkeys.New(27);
|
|
m_rounds = 27;
|
|
SPECK_ExpandKey_4W<word32, 27>(m_rkeys, m_wspace);
|
|
break;
|
|
default:
|
|
CRYPTOPP_ASSERT(0);;
|
|
}
|
|
}
|
|
|
|
void SPECK64::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
|
|
{
|
|
// Reverse bytes on LittleEndian; align pointer on BigEndian
|
|
typedef GetBlock<word32, BigEndian, false> InBlock;
|
|
InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]);
|
|
|
|
switch (m_rounds)
|
|
{
|
|
case 26:
|
|
SPECK_Encrypt<word32, 26>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
case 27:
|
|
SPECK_Encrypt<word32, 27>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
default:
|
|
CRYPTOPP_ASSERT(0);;
|
|
}
|
|
|
|
// Reverse bytes on LittleEndian; align pointer on BigEndian
|
|
typedef PutBlock<word32, BigEndian, false> OutBlock;
|
|
OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[2])(m_wspace[3]);
|
|
}
|
|
|
|
void SPECK64::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
|
|
{
|
|
// Reverse bytes on LittleEndian; align pointer on BigEndian
|
|
typedef GetBlock<word32, BigEndian, false> InBlock;
|
|
InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]);
|
|
|
|
switch (m_rounds)
|
|
{
|
|
case 26:
|
|
SPECK_Decrypt<word32, 26>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
case 27:
|
|
SPECK_Decrypt<word32, 27>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
default:
|
|
CRYPTOPP_ASSERT(0);;
|
|
}
|
|
|
|
// Reverse bytes on LittleEndian; align pointer on BigEndian
|
|
typedef PutBlock<word32, BigEndian, false> OutBlock;
|
|
OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[2])(m_wspace[3]);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////
|
|
|
|
void SPECK128::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs ¶ms)
|
|
{
|
|
CRYPTOPP_ASSERT(keyLength == 16 || keyLength == 24 || keyLength == 32);
|
|
CRYPTOPP_UNUSED(params);
|
|
|
|
// Building the key schedule table requires {2,3,4} words workspace.
|
|
// Encrypting and decrypting requires 4 words workspace.
|
|
m_kwords = keyLength/sizeof(word64);
|
|
m_wspace.New(STDMAX(m_kwords,4U));
|
|
GetUserKey(BIG_ENDIAN_ORDER, m_wspace.begin(), m_kwords, userKey, keyLength);
|
|
|
|
switch (m_kwords)
|
|
{
|
|
case 2:
|
|
m_rkeys.New(32);
|
|
m_rounds = 32;
|
|
SPECK_ExpandKey_2W<word64, 32>(m_rkeys, m_wspace);
|
|
break;
|
|
case 3:
|
|
m_rkeys.New(33);
|
|
m_rounds = 33;
|
|
SPECK_ExpandKey_3W<word64, 33>(m_rkeys, m_wspace);
|
|
break;
|
|
case 4:
|
|
m_rkeys.New(34);
|
|
m_rounds = 34;
|
|
SPECK_ExpandKey_4W<word64, 34>(m_rkeys, m_wspace);
|
|
break;
|
|
default:
|
|
CRYPTOPP_ASSERT(0);;
|
|
}
|
|
}
|
|
|
|
void SPECK128::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
|
|
{
|
|
// Reverse bytes on LittleEndian; align pointer on BigEndian
|
|
typedef GetBlock<word64, BigEndian, false> InBlock;
|
|
InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]);
|
|
|
|
switch (m_rounds)
|
|
{
|
|
case 32:
|
|
SPECK_Encrypt<word64, 32>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
case 33:
|
|
SPECK_Encrypt<word64, 33>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
case 34:
|
|
SPECK_Encrypt<word64, 34>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
default:
|
|
CRYPTOPP_ASSERT(0);;
|
|
}
|
|
|
|
// Reverse bytes on LittleEndian; align pointer on BigEndian
|
|
typedef PutBlock<word64, BigEndian, false> OutBlock;
|
|
OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[2])(m_wspace[3]);
|
|
}
|
|
|
|
void SPECK128::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
|
|
{
|
|
// Reverse bytes on LittleEndian; align pointer on BigEndian
|
|
typedef GetBlock<word64, BigEndian, false> InBlock;
|
|
InBlock iblk(inBlock); iblk(m_wspace[0])(m_wspace[1]);
|
|
|
|
switch (m_rounds)
|
|
{
|
|
case 32:
|
|
SPECK_Decrypt<word64, 32>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
case 33:
|
|
SPECK_Decrypt<word64, 33>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
case 34:
|
|
SPECK_Decrypt<word64, 34>(m_wspace+2, m_wspace+0, m_rkeys);
|
|
break;
|
|
default:
|
|
CRYPTOPP_ASSERT(0);;
|
|
}
|
|
|
|
// Reverse bytes on LittleEndian; align pointer on BigEndian
|
|
typedef PutBlock<word64, BigEndian, false> OutBlock;
|
|
OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[2])(m_wspace[3]);
|
|
}
|
|
|
|
#if defined(CRYPTOPP_SPECK_ADVANCED_PROCESS_BLOCKS)
|
|
size_t SPECK128::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
|
|
byte *outBlocks, size_t length, word32 flags) const
|
|
{
|
|
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
|
|
if (HasSSSE3())
|
|
return SPECK128_Enc_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds,
|
|
inBlocks, xorBlocks, outBlocks, length, flags);
|
|
#endif
|
|
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
|
|
if (HasNEON())
|
|
return SPECK128_Enc_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
|
|
inBlocks, xorBlocks, outBlocks, length, flags);
|
|
#endif
|
|
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
|
|
}
|
|
|
|
size_t SPECK128::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
|
|
byte *outBlocks, size_t length, word32 flags) const
|
|
{
|
|
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
|
|
if (HasSSSE3())
|
|
return SPECK128_Dec_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds,
|
|
inBlocks, xorBlocks, outBlocks, length, flags);
|
|
#endif
|
|
#if defined(CRYPTOPP_ARM_NEON_AVAILABLE)
|
|
if (HasNEON())
|
|
return SPECK128_Dec_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
|
|
inBlocks, xorBlocks, outBlocks, length, flags);
|
|
#endif
|
|
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
|
|
}
|
|
#endif
|
|
|
|
NAMESPACE_END
|