Move Altivec AdvancedProcessBlocks into adv-simd.h

This commit is contained in:
Jeffrey Walton 2018-01-02 07:08:13 -05:00
parent fb0ecfde62
commit fac3a44a84
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
6 changed files with 192 additions and 166 deletions

View File

@ -8,16 +8,18 @@
//
// There are 8 templates provided in this file. The number following the
// function name is the block size of the cipher. The name following that
// is the acceleration and arrangement. For example SSE1x4 means Intel SSE
// using two encrypt (or decrypt) functions: one that operates on 1 block,
// and one that operates on 4 blocks.
// is the acceleration and arrangement. For example 4x1_SSE means Intel SSE
// using two encrypt (or decrypt) functions: one that operates on 4 blocks,
// and one that operates on 1 block.
//
// * AdvancedProcessBlocks64_SSE1x4
// * AdvancedProcessBlocks128_SSE1x4
// * AdvancedProcessBlocks64_SSE2x6
// * AdvancedProcessBlocks128_SSE2x6
// * AdvancedProcessBlocks64_NEON2x6
// * AdvancedProcessBlocks128_NEON2x6
// * AdvancedProcessBlocks64_4x1_SSE
// * AdvancedProcessBlocks128_4x1_SSE
// * AdvancedProcessBlocks64_6x2_SSE
// * AdvancedProcessBlocks128_6x2_SSE
// * AdvancedProcessBlocks64_6x2_NEON
// * AdvancedProcessBlocks128_6x2_NEON
// * AdvancedProcessBlocks64_6x2_ALTIVEC
// * AdvancedProcessBlocks128_6x2_ALTIVEC
//
#ifndef CRYPTOPP_ADVANCED_SIMD_TEMPLATES
@ -36,6 +38,10 @@
# include <tmmintrin.h>
#endif
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
# include "ppc-simd.h"
#endif
// https://www.spinics.net/lists/gcchelp/msg47735.html and
// https://www.spinics.net/lists/gcchelp/msg47749.html
#if (CRYPTOPP_GCC_VERSION >= 40900)
@ -88,7 +94,7 @@ ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
template <typename F2, typename F6>
inline size_t AdvancedProcessBlocks64_NEON2x6(F2 func2, F6 func6,
inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
const word32 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@ -455,7 +461,7 @@ size_t AdvancedProcessBlocks128_NEON1x6(F1 func1, F6 func6,
}
template <typename F2, typename F6>
size_t AdvancedProcessBlocks128_NEON2x6(F2 func2, F6 func6,
size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
const word64 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@ -690,7 +696,7 @@ ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
template <typename F2, typename F6>
inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_SSE2x6(F2 func2, F6 func6,
inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_6x2_SSE(F2 func2, F6 func6,
const word32 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@ -924,7 +930,7 @@ inline size_t GCC_NO_UBSAN AdvancedProcessBlocks64_SSE2x6(F2 func2, F6 func6,
}
template <typename F2, typename F6>
inline size_t AdvancedProcessBlocks128_SSE2x6(F2 func2, F6 func6,
inline size_t AdvancedProcessBlocks128_6x2_SSE(F2 func2, F6 func6,
const word64 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@ -1109,7 +1115,7 @@ inline size_t AdvancedProcessBlocks128_SSE2x6(F2 func2, F6 func6,
}
template <typename F1, typename F4>
inline size_t AdvancedProcessBlocks128_SSE1x4(F1 func1, F4 func4,
inline size_t AdvancedProcessBlocks128_4x1_SSE(F1 func1, F4 func4,
MAYBE_CONST word32 *subKeys, size_t rounds, const byte *inBlocks,
const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
@ -1232,4 +1238,144 @@ NAMESPACE_END // CryptoPP
#endif // CRYPTOPP_SSSE3_AVAILABLE
// *********************** Altivec/Power 4 ********************** //
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE)
ANONYMOUS_NAMESPACE_BEGIN
using CryptoPP::uint32x4_p;
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const uint32x4_p s_one = {1,0,0,0};
#else
const uint32x4_p s_one = {0,0,0,1};
#endif
ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
template <typename F1, typename F6>
size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
CRYPTOPP_ASSERT(subKeys);
CRYPTOPP_ASSERT(inBlocks);
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
const ptrdiff_t blockSize = 16;
ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize;
ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0;
ptrdiff_t outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : blockSize;
if (flags & BT_ReverseDirection)
{
inBlocks += length - blockSize;
xorBlocks += length - blockSize;
outBlocks += length - blockSize;
inIncrement = 0-inIncrement;
xorIncrement = 0-xorIncrement;
outIncrement = 0-outIncrement;
}
if (flags & BT_AllowParallel)
{
while (length >= 6*blockSize)
{
VectorType block0, block1, block2, block3, block4, block5, temp;
block0 = VectorLoad(inBlocks);
if (flags & BT_InBlockIsCounter)
{
block1 = VectorAdd(block0, s_one);
block2 = VectorAdd(block1, s_one);
block3 = VectorAdd(block2, s_one);
block4 = VectorAdd(block3, s_one);
block5 = VectorAdd(block4, s_one);
temp = VectorAdd(block5, s_one);
VectorStore(temp, const_cast<byte*>(inBlocks));
}
else
{
const int inc = static_cast<int>(inIncrement);
block1 = VectorLoad(1*inc, inBlocks);
block2 = VectorLoad(2*inc, inBlocks);
block3 = VectorLoad(3*inc, inBlocks);
block4 = VectorLoad(4*inc, inBlocks);
block5 = VectorLoad(5*inc, inBlocks);
inBlocks += 6*inc;
}
if (flags & BT_XorInput)
{
const int inc = static_cast<int>(xorIncrement);
block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks));
block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks));
xorBlocks += 6*inc;
}
func6(block0, block1, block2, block3, block4, block5, subKeys, rounds);
if (xorBlocks && !(flags & BT_XorInput))
{
const int inc = static_cast<int>(xorIncrement);
block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks));
block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks));
xorBlocks += 6*inc;
}
const int inc = static_cast<int>(outIncrement);
VectorStore(block0, outBlocks+0*inc);
VectorStore(block1, outBlocks+1*inc);
VectorStore(block2, outBlocks+2*inc);
VectorStore(block3, outBlocks+3*inc);
VectorStore(block4, outBlocks+4*inc);
VectorStore(block5, outBlocks+5*inc);
outBlocks += 6*inc;
length -= 6*blockSize;
}
}
while (length >= blockSize)
{
VectorType block = VectorLoad(inBlocks);
if (flags & BT_XorInput)
block = VectorXor(block, VectorLoad(xorBlocks));
if (flags & BT_InBlockIsCounter)
const_cast<byte *>(inBlocks)[15]++;
func1(block, subKeys, rounds);
if (xorBlocks && !(flags & BT_XorInput))
block = VectorXor(block, VectorLoad(xorBlocks));
VectorStore(block, outBlocks);
inBlocks += inIncrement;
outBlocks += outIncrement;
xorBlocks += xorIncrement;
length -= blockSize;
}
return length;
}
NAMESPACE_END // CryptoPP
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
#endif // CRYPTOPP_ADVANCED_SIMD_TEMPLATES

View File

@ -29,9 +29,12 @@ NAMESPACE_BEGIN(CryptoPP)
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
typedef __vector unsigned char uint8x16_p;
typedef __vector unsigned int uint32x4_p;
#if defined(CRYPTOPP_POWER5_AVAILABLE)
typedef __vector char int8x16_p;
typedef __vector unsigned char uint8x16_p;
typedef __vector unsigned short uint16x8_p;
typedef __vector unsigned int uint32x4_p;
#if defined(CRYPTOPP_POWER8_AVAILABLE)
typedef __vector unsigned long long uint64x2_p;
#endif

View File

@ -517,7 +517,7 @@ size_t Rijndael_Enc_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro
MAYBE_CONST byte* ib = MAYBE_UNCONST_CAST(byte*, inBlocks);
MAYBE_CONST byte* xb = MAYBE_UNCONST_CAST(byte*, xorBlocks);
return AdvancedProcessBlocks128_SSE1x4(AESNI_Enc_Block, AESNI_Enc_4_Blocks,
return AdvancedProcessBlocks128_4x1_SSE(AESNI_Enc_Block, AESNI_Enc_4_Blocks,
sk, rounds, ib, xb, outBlocks, length, flags);
}
@ -528,7 +528,7 @@ size_t Rijndael_Dec_AdvancedProcessBlocks_AESNI(const word32 *subKeys, size_t ro
MAYBE_CONST byte* ib = MAYBE_UNCONST_CAST(byte*, inBlocks);
MAYBE_CONST byte* xb = MAYBE_UNCONST_CAST(byte*, xorBlocks);
return AdvancedProcessBlocks128_SSE1x4(AESNI_Dec_Block, AESNI_Dec_4_Blocks,
return AdvancedProcessBlocks128_4x1_SSE(AESNI_Dec_Block, AESNI_Dec_4_Blocks,
sk, rounds, ib, xb, outBlocks, length, flags);
}
@ -702,129 +702,6 @@ static inline void POWER8_Dec_6_Blocks(VectorType &block0, VectorType &block1,
block5 = VectorDecryptLast(block5, k);
}
template <typename F1, typename F6>
size_t Rijndael_AdvancedProcessBlocks_POWER8(F1 func1, F6 func6, const word32 *subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
CRYPTOPP_ASSERT(subKeys);
CRYPTOPP_ASSERT(inBlocks);
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
const size_t blockSize = 16;
size_t inIncrement = (flags & (BlockTransformation::BT_InBlockIsCounter|BlockTransformation::BT_DontIncrementInOutPointers)) ? 0 : blockSize;
size_t xorIncrement = xorBlocks ? blockSize : 0;
size_t outIncrement = (flags & BlockTransformation::BT_DontIncrementInOutPointers) ? 0 : blockSize;
if (flags & BlockTransformation::BT_ReverseDirection)
{
inBlocks += length - blockSize;
xorBlocks += length - blockSize;
outBlocks += length - blockSize;
inIncrement = 0-inIncrement;
xorIncrement = 0-xorIncrement;
outIncrement = 0-outIncrement;
}
if (flags & BlockTransformation::BT_AllowParallel)
{
while (length >= 6*blockSize)
{
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const VectorType one = (VectorType)((uint64x2_p){1,0});
#else
const VectorType one = (VectorType)((uint64x2_p){0,1});
#endif
VectorType block0, block1, block2, block3, block4, block5, temp;
block0 = VectorLoad(inBlocks);
if (flags & BlockTransformation::BT_InBlockIsCounter)
{
block1 = VectorAdd(block0, one);
block2 = VectorAdd(block1, one);
block3 = VectorAdd(block2, one);
block4 = VectorAdd(block3, one);
block5 = VectorAdd(block4, one);
temp = VectorAdd(block5, one);
VectorStore(temp, const_cast<byte*>(inBlocks));
}
else
{
const int inc = static_cast<int>(inIncrement);
block1 = VectorLoad(1*inc, inBlocks);
block2 = VectorLoad(2*inc, inBlocks);
block3 = VectorLoad(3*inc, inBlocks);
block4 = VectorLoad(4*inc, inBlocks);
block5 = VectorLoad(5*inc, inBlocks);
inBlocks += 6*inc;
}
if (flags & BlockTransformation::BT_XorInput)
{
const int inc = static_cast<int>(xorIncrement);
block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks));
block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks));
xorBlocks += 6*inc;
}
func6(block0, block1, block2, block3, block4, block5, subKeys, rounds);
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
{
const int inc = static_cast<int>(xorIncrement);
block0 = VectorXor(block0, VectorLoad(0*inc, xorBlocks));
block1 = VectorXor(block1, VectorLoad(1*inc, xorBlocks));
block2 = VectorXor(block2, VectorLoad(2*inc, xorBlocks));
block3 = VectorXor(block3, VectorLoad(3*inc, xorBlocks));
block4 = VectorXor(block4, VectorLoad(4*inc, xorBlocks));
block5 = VectorXor(block5, VectorLoad(5*inc, xorBlocks));
xorBlocks += 6*inc;
}
const int inc = static_cast<int>(outIncrement);
VectorStore(block0, outBlocks+0*inc);
VectorStore(block1, outBlocks+1*inc);
VectorStore(block2, outBlocks+2*inc);
VectorStore(block3, outBlocks+3*inc);
VectorStore(block4, outBlocks+4*inc);
VectorStore(block5, outBlocks+5*inc);
outBlocks += 6*inc;
length -= 6*blockSize;
}
}
while (length >= blockSize)
{
VectorType block = VectorLoad(inBlocks);
if (flags & BlockTransformation::BT_XorInput)
block = VectorXor(block, VectorLoad(xorBlocks));
if (flags & BlockTransformation::BT_InBlockIsCounter)
const_cast<byte *>(inBlocks)[15]++;
func1(block, subKeys, rounds);
if (xorBlocks && !(flags & BlockTransformation::BT_XorInput))
block = VectorXor(block, VectorLoad(xorBlocks));
VectorStore(block, outBlocks);
inBlocks += inIncrement;
outBlocks += outIncrement;
xorBlocks += xorIncrement;
length -= blockSize;
}
return length;
}
ANONYMOUS_NAMESPACE_END
// We still need rcon and Se to fallback to C/C++ for AES-192 and AES-256.
@ -925,17 +802,17 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
}
}
size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subKeys, size_t rounds,
size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return Rijndael_AdvancedProcessBlocks_POWER8(POWER8_Enc_Block, POWER8_Enc_6_Blocks,
return AdvancedProcessBlocks128_6x1_ALTIVEC(POWER8_Enc_Block, POWER8_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t Rijndael_Dec_AdvancedProcessBlocks_POWER8(const word32 *subKeys, size_t rounds,
size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return Rijndael_AdvancedProcessBlocks_POWER8(POWER8_Dec_Block, POWER8_Dec_6_Blocks,
return AdvancedProcessBlocks128_6x1_ALTIVEC(POWER8_Dec_Block, POWER8_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}

View File

@ -304,9 +304,9 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si
extern void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen,
word32* rk, const word32* rc, const byte* Se);
extern size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds,
extern size_t Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subkeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
extern size_t Rijndael_Dec_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds,
extern size_t Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(const word32 *subkeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
#endif
@ -1139,7 +1139,7 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
#endif
#if CRYPTOPP_POWER8_AES_AVAILABLE
if (HasAES())
return Rijndael_Enc_AdvancedProcessBlocks_POWER8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
return Rijndael_Enc_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
#endif
#if (CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)) && !defined(CRYPTOPP_DISABLE_RIJNDAEL_ASM)
@ -1207,7 +1207,7 @@ size_t Rijndael::Dec::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
#endif
#if CRYPTOPP_POWER8_AES_AVAILABLE
if (HasAES())
return Rijndael_Dec_AdvancedProcessBlocks_POWER8(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
return Rijndael_Dec_AdvancedProcessBlocks128_6x1_ALTIVEC(m_key, m_rounds, inBlocks, xorBlocks, outBlocks, length, flags);
#endif
return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);

View File

@ -1155,14 +1155,14 @@ NAMESPACE_BEGIN(CryptoPP)
size_t SIMON64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks64_NEON2x6(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks,
return AdvancedProcessBlocks64_6x2_NEON(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SIMON64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks64_NEON2x6(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks,
return AdvancedProcessBlocks64_6x2_NEON(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
@ -1171,14 +1171,14 @@ size_t SIMON64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t roun
size_t SIMON128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks128_NEON2x6(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks,
return AdvancedProcessBlocks128_6x2_NEON(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SIMON128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks128_NEON2x6(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks,
return AdvancedProcessBlocks128_6x2_NEON(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
@ -1189,14 +1189,14 @@ size_t SIMON128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rou
size_t SIMON64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks64_SSE2x6(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks,
return AdvancedProcessBlocks64_6x2_SSE(SIMON64_Enc_Block, SIMON64_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SIMON64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks64_SSE2x6(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks,
return AdvancedProcessBlocks64_6x2_SSE(SIMON64_Dec_Block, SIMON64_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif
@ -1205,14 +1205,14 @@ size_t SIMON64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rou
size_t SIMON128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks128_SSE2x6(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks,
return AdvancedProcessBlocks128_6x2_SSE(SIMON128_Enc_Block, SIMON128_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SIMON128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks128_SSE2x6(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks,
return AdvancedProcessBlocks128_6x2_SSE(SIMON128_Dec_Block, SIMON128_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_SSSE3_AVAILABLE

View File

@ -1056,14 +1056,14 @@ NAMESPACE_BEGIN(CryptoPP)
size_t SPECK64_Enc_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks64_NEON2x6(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks64_NEON2x6(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
return AdvancedProcessBlocks64_6x2_NEON(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif
@ -1072,14 +1072,14 @@ size_t SPECK64_Dec_AdvancedProcessBlocks_NEON(const word32* subKeys, size_t roun
size_t SPECK128_Enc_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks128_NEON2x6(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
return AdvancedProcessBlocks128_6x2_NEON(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks128_NEON2x6(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
return AdvancedProcessBlocks128_6x2_NEON(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
@ -1090,14 +1090,14 @@ size_t SPECK128_Dec_AdvancedProcessBlocks_NEON(const word64* subKeys, size_t rou
size_t SPECK64_Enc_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks64_SSE2x6(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Enc_Block, SPECK64_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks64_SSE2x6(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
return AdvancedProcessBlocks64_6x2_SSE(SPECK64_Dec_Block, SPECK64_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif
@ -1106,14 +1106,14 @@ size_t SPECK64_Dec_AdvancedProcessBlocks_SSE41(const word32* subKeys, size_t rou
size_t SPECK128_Enc_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks128_SSE2x6(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
return AdvancedProcessBlocks128_6x2_SSE(SPECK128_Enc_Block, SPECK128_Enc_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
size_t SPECK128_Dec_AdvancedProcessBlocks_SSSE3(const word64* subKeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags)
{
return AdvancedProcessBlocks128_SSE2x6(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
return AdvancedProcessBlocks128_6x2_SSE(SPECK128_Dec_Block, SPECK128_Dec_6_Blocks,
subKeys, rounds, inBlocks, xorBlocks, outBlocks, length, flags);
}
#endif // CRYPTOPP_SSSE3_AVAILABLE