ext-cryptopp/speck.cpp
Jeffrey Walton 39418a8512
Use PowerPC unaligned loads and stores with Power8 (GH #825, PR #826)
Use PowerPC unaligned loads and stores with Power8. Formerly we were using Power7 as the floor because the IBM POWER Architecture manuals said unaligned loads and stores were available. However, some compilers generate bad code for unaligned loads and stores using `-march=power7`, so bump to a known good.
2019-04-27 20:35:01 -04:00

531 lines
18 KiB
C++

// speck.cpp - written and placed in the public domain by Jeffrey Walton
#include "pch.h"
#include "config.h"
#include "speck.h"
#include "misc.h"
#include "cpu.h"
#ifndef CRYPTOPP_INLINE
# if defined(CRYPTOPP_DEBUG)
# define CRYPTOPP_INLINE static
# else
# define CRYPTOPP_INLINE inline
# endif
#endif
// Uncomment for benchmarking C++ against SSE or NEON.
// Do so in both speck.cpp and speck-simd.cpp.
// #undef CRYPTOPP_SSSE3_AVAILABLE
// #undef CRYPTOPP_SSE41_AVAILABLE
// #undef CRYPTOPP_ARM_NEON_AVAILABLE
ANONYMOUS_NAMESPACE_BEGIN
using CryptoPP::word32;
using CryptoPP::word64;
using CryptoPP::rotlConstant;
using CryptoPP::rotrConstant;
/// \brief Forward round transformation
/// \tparam W word type
/// \details TF83() is the forward round transformation using a=8 and b=3 rotations.
/// The initial test implementation provided template parameters, but they were
/// removed because SPECK32 using a=7 and b=2 was not on the road map. The
/// additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt
/// kind of messy.
template <class W>
CRYPTOPP_INLINE void TF83(W& x, W& y, const W k)
{
x = rotrConstant<8>(x);
x += y; x ^= k;
y = rotlConstant<3>(y);
y ^= x;
}
/// \brief Reverse round transformation
/// \tparam W word type
/// \details TR83() is the reverse round transformation using a=8 and b=3 rotations.
/// The initial test implementation provided template parameters, but they were
/// removed because SPECK32 using a=7 and b=2 was not on the road map. The
/// additional template parameters also made calling SPECK_Encrypt and SPECK_Decrypt
/// kind of messy.
template <class W>
CRYPTOPP_INLINE void TR83(W& x, W& y, const W k)
{
y ^= x;
y = rotrConstant<3>(y);
x ^= k; x -= y;
x = rotlConstant<8>(x);
}
/// \brief Forward transformation
/// \tparam W word type
/// \tparam R number of rounds
/// \param c output array
/// \param p input array
/// \param k subkey array
template <class W, unsigned int R>
CRYPTOPP_INLINE void SPECK_Encrypt(W c[2], const W p[2], const W k[R])
{
c[0]=p[0]; c[1]=p[1];
// Don't unroll this loop. Things slow down.
for (int i = 0; i < static_cast<int>(R); ++i)
TF83(c[0], c[1], k[i]);
}
/// \brief Reverse transformation
/// \tparam W word type
/// \tparam R number of rounds
/// \param p output array
/// \param c input array
/// \param k subkey array
template <class W, unsigned int R>
CRYPTOPP_INLINE void SPECK_Decrypt(W p[2], const W c[2], const W k[R])
{
p[0]=c[0]; p[1]=c[1];
// Don't unroll this loop. Things slow down.
for (int i = static_cast<int>(R-1); i >= 0; --i)
TR83(p[0], p[1], k[i]);
}
/// \brief Subkey generation function
/// \details Used when the user key consists of 2 words
/// \tparam W word type
/// \tparam R number of rounds
/// \param key empty subkey array
/// \param k user key array
template <class W, unsigned int R>
CRYPTOPP_INLINE void SPECK_ExpandKey_2W(W key[R], const W k[2])
{
CRYPTOPP_ASSERT(R==32);
W i=0, B=k[0], A=k[1];
while (i<R-1)
{
key[i]=A; TF83(B, A, i);
i++;
}
key[R-1]=A;
}
/// \brief Subkey generation function
/// \details Used when the user key consists of 3 words
/// \tparam W word type
/// \tparam R number of rounds
/// \param key empty subkey array
/// \param k user key array
template <class W, unsigned int R>
CRYPTOPP_INLINE void SPECK_ExpandKey_3W(W key[R], const W k[3])
{
CRYPTOPP_ASSERT(R==33 || R==26);
W i=0, C=k[0], B=k[1], A=k[2];
unsigned int blocks = R/2;
while (blocks--)
{
key[i+0]=A; TF83(B, A, i+0);
key[i+1]=A; TF83(C, A, i+1);
i+=2;
}
// The constexpr residue should allow the optimizer to remove unneeded statements
if(R%2 == 1)
{
key[R-1]=A;
}
}
/// \brief Subkey generation function
/// \details Used when the user key consists of 4 words
/// \tparam W word type
/// \tparam R number of rounds
/// \param key empty subkey array
/// \param k user key array
template <class W, unsigned int R>
CRYPTOPP_INLINE void SPECK_ExpandKey_4W(W key[R], const W k[4])
{
CRYPTOPP_ASSERT(R==34 || R==27);
W i=0, D=k[0], C=k[1], B=k[2], A=k[3];
unsigned int blocks = R/3;
while (blocks--)
{
key[i+0]=A; TF83(B, A, i+0);
key[i+1]=A; TF83(C, A, i+1);
key[i+2]=A; TF83(D, A, i+2);
i+=3;
}
// The constexpr residue should allow the optimizer to remove unneeded statements
if(R%3 == 1)
{
key[R-1]=A;
}
else if(R%3 == 2)
{
key[R-2]=A; TF83(B, A, W(R-2));
key[R-1]=A;
}
}
ANONYMOUS_NAMESPACE_END
///////////////////////////////////////////////////////////
NAMESPACE_BEGIN(CryptoPP)
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
extern size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
extern size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
extern size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
extern size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
#if defined(CRYPTOPP_SSE41_AVAILABLE)
extern size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
extern size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
extern size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
extern size_t SPECK128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
extern size_t SPECK64_Enc_AdvancedProcessBlocks_ALTIVEC(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
extern size_t SPECK64_Dec_AdvancedProcessBlocks_ALTIVEC(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
#if defined(CRYPTOPP_POWER8_AVAILABLE)
extern size_t SPECK128_Enc_AdvancedProcessBlocks_POWER8(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
extern size_t SPECK128_Dec_AdvancedProcessBlocks_POWER8(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
std::string SPECK64::Base::AlgorithmProvider() const
{
#if (CRYPTOPP_SPECK64_ADVANCED_PROCESS_BLOCKS)
# if (CRYPTOPP_SSE41_AVAILABLE)
if (HasSSE41())
return "SSE4.1";
# endif
# if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return "NEON";
# endif
# if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return "Power8";
# endif
# if (CRYPTOPP_ALTIVEC_AVAILABLE)
if (HasAltivec())
return "Altivec";
# endif
#endif
return "C++";
}
void SPECK64::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs &params)
{
CRYPTOPP_ASSERT(keyLength == 12 || keyLength == 16);
CRYPTOPP_UNUSED(params);
// Building the key schedule table requires {3,4} words workspace.
// Encrypting and decrypting requires 4 words workspace.
m_kwords = keyLength/sizeof(word32);
m_wspace.New(4U);
// Do the endian gyrations from the paper and align pointers
typedef GetBlock<word32, LittleEndian> KeyBlock;
KeyBlock kblk(userKey);
switch (m_kwords)
{
case 3:
m_rkeys.New((m_rounds = 26));
kblk(m_wspace[2])(m_wspace[1])(m_wspace[0]);
SPECK_ExpandKey_3W<word32, 26>(m_rkeys, m_wspace);
break;
case 4:
m_rkeys.New((m_rounds = 27));
kblk(m_wspace[3])(m_wspace[2])(m_wspace[1])(m_wspace[0]);
SPECK_ExpandKey_4W<word32, 27>(m_rkeys, m_wspace);
break;
default:
CRYPTOPP_ASSERT(0);;
}
// Altivec loads the current subkey as a 16-byte vector
// The extra elements ensure memory backs the last subkey.
#if CRYPTOPP_ALTIVEC_AVAILABLE
m_rkeys.Grow(m_rkeys.size()+4);
#endif
}
void SPECK64::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
// Do the endian gyrations from the paper and align pointers
typedef GetBlock<word32, LittleEndian> InBlock;
InBlock iblk(inBlock); iblk(m_wspace[1])(m_wspace[0]);
switch (m_rounds)
{
case 26:
SPECK_Encrypt<word32, 26>(m_wspace+2, m_wspace+0, m_rkeys);
break;
case 27:
SPECK_Encrypt<word32, 27>(m_wspace+2, m_wspace+0, m_rkeys);
break;
default:
CRYPTOPP_ASSERT(0);;
}
// Do the endian gyrations from the paper and align pointers
typedef PutBlock<word32, LittleEndian> OutBlock;
OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[3])(m_wspace[2]);
}
void SPECK64::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
// Do the endian gyrations from the paper and align pointers
typedef GetBlock<word32, LittleEndian> InBlock;
InBlock iblk(inBlock); iblk(m_wspace[1])(m_wspace[0]);
switch (m_rounds)
{
case 26:
SPECK_Decrypt<word32, 26>(m_wspace+2, m_wspace+0, m_rkeys);
break;
case 27:
SPECK_Decrypt<word32, 27>(m_wspace+2, m_wspace+0, m_rkeys);
break;
default:
CRYPTOPP_ASSERT(0);;
}
// Do the endian gyrations from the paper and align pointers
typedef PutBlock<word32, LittleEndian> OutBlock;
OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[3])(m_wspace[2]);
}
///////////////////////////////////////////////////////////
std::string SPECK128::Base::AlgorithmProvider() const
{
#if (CRYPTOPP_SPECK128_ADVANCED_PROCESS_BLOCKS)
# if (CRYPTOPP_SSSE3_AVAILABLE)
if (HasSSSE3())
return "SSSE3";
# endif
# if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return "NEON";
# endif
# if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return "Power8";
# endif
#endif
return "C++";
}
void SPECK128::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLength, const NameValuePairs &params)
{
CRYPTOPP_ASSERT(keyLength == 16 || keyLength == 24 || keyLength == 32);
CRYPTOPP_UNUSED(params);
// Building the key schedule table requires {2,3,4} words workspace.
// Encrypting and decrypting requires 4 words workspace.
m_kwords = keyLength/sizeof(word64);
m_wspace.New(4U);
// Do the endian gyrations from the paper and align pointers
typedef GetBlock<word64, LittleEndian> KeyBlock;
KeyBlock kblk(userKey);
switch (m_kwords)
{
case 2:
m_rkeys.New((m_rounds = 32));
kblk(m_wspace[1])(m_wspace[0]);
SPECK_ExpandKey_2W<word64, 32>(m_rkeys, m_wspace);
break;
case 3:
m_rkeys.New((m_rounds = 33));
kblk(m_wspace[2])(m_wspace[1])(m_wspace[0]);
SPECK_ExpandKey_3W<word64, 33>(m_rkeys, m_wspace);
break;
case 4:
m_rkeys.New((m_rounds = 34));
kblk(m_wspace[3])(m_wspace[2])(m_wspace[1])(m_wspace[0]);
SPECK_ExpandKey_4W<word64, 34>(m_rkeys, m_wspace);
break;
default:
CRYPTOPP_ASSERT(0);;
}
}
void SPECK128::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
// Do the endian gyrations from the paper and align pointers
typedef GetBlock<word64, LittleEndian> InBlock;
InBlock iblk(inBlock); iblk(m_wspace[1])(m_wspace[0]);
switch (m_rounds)
{
case 32:
SPECK_Encrypt<word64, 32>(m_wspace+2, m_wspace+0, m_rkeys);
break;
case 33:
SPECK_Encrypt<word64, 33>(m_wspace+2, m_wspace+0, m_rkeys);
break;
case 34:
SPECK_Encrypt<word64, 34>(m_wspace+2, m_wspace+0, m_rkeys);
break;
default:
CRYPTOPP_ASSERT(0);;
}
// Do the endian gyrations from the paper and align pointers
typedef PutBlock<word64, LittleEndian> OutBlock;
OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[3])(m_wspace[2]);
}
void SPECK128::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
// Do the endian gyrations from the paper and align pointers
typedef GetBlock<word64, LittleEndian> InBlock;
InBlock iblk(inBlock); iblk(m_wspace[1])(m_wspace[0]);
switch (m_rounds)
{
case 32:
SPECK_Decrypt<word64, 32>(m_wspace+2, m_wspace+0, m_rkeys);
break;
case 33:
SPECK_Decrypt<word64, 33>(m_wspace+2, m_wspace+0, m_rkeys);
break;
case 34:
SPECK_Decrypt<word64, 34>(m_wspace+2, m_wspace+0, m_rkeys);
break;
default:
CRYPTOPP_ASSERT(0);;
}
// Do the endian gyrations from the paper and align pointers
typedef PutBlock<word64, LittleEndian> OutBlock;
OutBlock oblk(xorBlock, outBlock); oblk(m_wspace[3])(m_wspace[2]);
}
#if defined(CRYPTOPP_SPECK64_ADVANCED_PROCESS_BLOCKS)
size_t SPECK64::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
byte *outBlocks, size_t length, word32 flags) const
{
#if defined(CRYPTOPP_SSE41_AVAILABLE)
if (HasSSE41())
return SPECK64_Enc_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return SPECK64_Enc_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_ALTIVEC_AVAILABLE)
if (HasAltivec())
return SPECK64_Enc_AdvancedProcessBlocks_ALTIVEC(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK64::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
byte *outBlocks, size_t length, word32 flags) const
{
#if defined(CRYPTOPP_SSE41_AVAILABLE)
if (HasSSE41())
return SPECK64_Dec_AdvancedProcessBlocks_SSE41(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return SPECK64_Dec_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_ALTIVEC_AVAILABLE)
if (HasAltivec())
return SPECK64_Dec_AdvancedProcessBlocks_ALTIVEC(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_SPECK64_ADVANCED_PROCESS_BLOCKS
#if defined(CRYPTOPP_SPECK128_ADVANCED_PROCESS_BLOCKS)
size_t SPECK128::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
byte *outBlocks, size_t length, word32 flags) const
{
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
if (HasSSSE3())
return SPECK128_Enc_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return SPECK128_Enc_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return SPECK128_Enc_AdvancedProcessBlocks_POWER8(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK128::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks,
byte *outBlocks, size_t length, word32 flags) const
{
#if defined(CRYPTOPP_SSSE3_AVAILABLE)
if (HasSSSE3())
return SPECK128_Dec_AdvancedProcessBlocks_SSSE3(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return SPECK128_Dec_AdvancedProcessBlocks_NEON(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return SPECK128_Dec_AdvancedProcessBlocks_POWER8(m_rkeys, (size_t)m_rounds,
inBlocks, xorBlocks, outBlocks, length, flags);
#endif
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_SPECK128_ADVANCED_PROCESS_BLOCKS
NAMESPACE_END