mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-26 19:30:21 +00:00
Switch to rotlConstant and rotrConstant
Update comments
This commit is contained in:
parent
2abf7d7bc4
commit
4f2d6f713f
@ -48,8 +48,6 @@ ANONYMOUS_NAMESPACE_BEGIN
|
|||||||
using CryptoPP::byte;
|
using CryptoPP::byte;
|
||||||
using CryptoPP::word32;
|
using CryptoPP::word32;
|
||||||
using CryptoPP::word64;
|
using CryptoPP::word64;
|
||||||
using CryptoPP::rotlFixed;
|
|
||||||
using CryptoPP::rotrFixed;
|
|
||||||
using CryptoPP::BlockTransformation;
|
using CryptoPP::BlockTransformation;
|
||||||
|
|
||||||
// *************************** ARM NEON ************************** //
|
// *************************** ARM NEON ************************** //
|
||||||
@ -104,11 +102,9 @@ inline uint64x2_t Shuffle64(const uint64x2_t& val)
|
|||||||
|
|
||||||
inline void SPECK128_Enc_Block(uint8x16_t &block0, const word64 *subkeys, unsigned int rounds)
|
inline void SPECK128_Enc_Block(uint8x16_t &block0, const word64 *subkeys, unsigned int rounds)
|
||||||
{
|
{
|
||||||
// Hack ahead... SPECK128_AdvancedProcessBlocks_NEON loads each SPECK-128 block into a
|
// Hack ahead... Rearrange the data for vectorization. It is easier to permute
|
||||||
// uint64x2_t. We can't SSE over them, so we rearrange the data to allow packed operations.
|
// the data in SPECK128_Enc_Blocks then SPECK128_AdvancedProcessBlocks_SSSE3.
|
||||||
// Its also easier to permute them in SPECK128_Enc_Block rather than the calling code.
|
// The zero block below is a "don't care". It is present so we can vectorize.
|
||||||
// SPECK128_AdvancedProcessBlocks_NEON is rather messy. The zero block below is a
|
|
||||||
// "don't care". It is present so we can vectorize SPECK128_Enc_Block.
|
|
||||||
uint8x16_t block1 = {0};
|
uint8x16_t block1 = {0};
|
||||||
uint64x2_t x1 = UnpackLow64<uint64x2_t>(block0, block1);
|
uint64x2_t x1 = UnpackLow64<uint64x2_t>(block0, block1);
|
||||||
uint64x2_t y1 = UnpackHigh64<uint64x2_t>(block0, block1);
|
uint64x2_t y1 = UnpackHigh64<uint64x2_t>(block0, block1);
|
||||||
@ -138,10 +134,8 @@ inline void SPECK128_Enc_6_Blocks(uint8x16_t &block0, uint8x16_t &block1,
|
|||||||
uint8x16_t &block2, uint8x16_t &block3, uint8x16_t &block4,
|
uint8x16_t &block2, uint8x16_t &block3, uint8x16_t &block4,
|
||||||
uint8x16_t &block5, const word64 *subkeys, unsigned int rounds)
|
uint8x16_t &block5, const word64 *subkeys, unsigned int rounds)
|
||||||
{
|
{
|
||||||
// Hack ahead... SPECK128_AdvancedProcessBlocks_NEON loads each SPECK-128 block into a
|
// Hack ahead... Rearrange the data for vectorization. It is easier to permute
|
||||||
// uint64x2_t. We can't SSE over them, so we rearrange the data to allow packed operations.
|
// the data in SPECK128_Enc_Blocks then SPECK128_AdvancedProcessBlocks_SSSE3.
|
||||||
// Its also easier to permute them in SPECK128_Enc_6_Blocks rather than the calling code.
|
|
||||||
// SPECK128_AdvancedProcessBlocks_NEON is rather messy.
|
|
||||||
uint64x2_t x1 = UnpackLow64<uint64x2_t>(block0, block1);
|
uint64x2_t x1 = UnpackLow64<uint64x2_t>(block0, block1);
|
||||||
uint64x2_t y1 = UnpackHigh64<uint64x2_t>(block0, block1);
|
uint64x2_t y1 = UnpackHigh64<uint64x2_t>(block0, block1);
|
||||||
uint64x2_t x2 = UnpackLow64<uint64x2_t>(block2, block3);
|
uint64x2_t x2 = UnpackLow64<uint64x2_t>(block2, block3);
|
||||||
@ -194,11 +188,9 @@ inline void SPECK128_Enc_6_Blocks(uint8x16_t &block0, uint8x16_t &block1,
|
|||||||
|
|
||||||
inline void SPECK128_Dec_Block(uint8x16_t &block0, const word64 *subkeys, unsigned int rounds)
|
inline void SPECK128_Dec_Block(uint8x16_t &block0, const word64 *subkeys, unsigned int rounds)
|
||||||
{
|
{
|
||||||
// Hack ahead... SPECK128_AdvancedProcessBlocks_NEON loads each SPECK-128 block into a
|
// Hack ahead... Rearrange the data for vectorization. It is easier to permute
|
||||||
// uint64x2_t. We can't SSE over them, so we rearrange the data to allow packed operations.
|
// the data in SPECK128_Dec_Blocks then SPECK128_AdvancedProcessBlocks_SSSE3.
|
||||||
// Its also easier to permute them in SPECK128_Dec_Block rather than the calling code.
|
// The zero block below is a "don't care". It is present so we can vectorize.
|
||||||
// SPECK128_AdvancedProcessBlocks_NEON is rather messy. The zero block below is a
|
|
||||||
// "don't care". It is present so we can vectorize SPECK128_Dec_Block.
|
|
||||||
uint8x16_t block1 = {0};
|
uint8x16_t block1 = {0};
|
||||||
uint64x2_t x1 = UnpackLow64<uint64x2_t>(block0, block1);
|
uint64x2_t x1 = UnpackLow64<uint64x2_t>(block0, block1);
|
||||||
uint64x2_t y1 = UnpackHigh64<uint64x2_t>(block0, block1);
|
uint64x2_t y1 = UnpackHigh64<uint64x2_t>(block0, block1);
|
||||||
@ -228,10 +220,8 @@ inline void SPECK128_Dec_6_Blocks(uint8x16_t &block0, uint8x16_t &block1,
|
|||||||
uint8x16_t &block2, uint8x16_t &block3, uint8x16_t &block4,
|
uint8x16_t &block2, uint8x16_t &block3, uint8x16_t &block4,
|
||||||
uint8x16_t &block5, const word64 *subkeys, unsigned int rounds)
|
uint8x16_t &block5, const word64 *subkeys, unsigned int rounds)
|
||||||
{
|
{
|
||||||
// Hack ahead... SPECK128_AdvancedProcessBlocks_NEON loads each SPECK-128 block into a
|
// Hack ahead... Rearrange the data for vectorization. It is easier to permute
|
||||||
// uint64x2_t. We can't SSE over them, so we rearrange the data to allow packed operations.
|
// the data in SPECK128_Dec_Blocks then SPECK128_AdvancedProcessBlocks_SSSE3.
|
||||||
// Its also easier to permute them in SPECK128_Dec_6_Blocks rather than the calling code.
|
|
||||||
// SPECK128_AdvancedProcessBlocks_NEON is rather messy.
|
|
||||||
uint64x2_t x1 = UnpackLow64<uint64x2_t>(block0, block1);
|
uint64x2_t x1 = UnpackLow64<uint64x2_t>(block0, block1);
|
||||||
uint64x2_t y1 = UnpackHigh64<uint64x2_t>(block0, block1);
|
uint64x2_t y1 = UnpackHigh64<uint64x2_t>(block0, block1);
|
||||||
uint64x2_t x2 = UnpackLow64<uint64x2_t>(block2, block3);
|
uint64x2_t x2 = UnpackLow64<uint64x2_t>(block2, block3);
|
||||||
@ -430,11 +420,9 @@ inline __m128i RotateRight64(const __m128i& val)
|
|||||||
|
|
||||||
inline void SPECK128_Enc_Block(__m128i &block0, const word64 *subkeys, unsigned int rounds)
|
inline void SPECK128_Enc_Block(__m128i &block0, const word64 *subkeys, unsigned int rounds)
|
||||||
{
|
{
|
||||||
// Hack ahead... SPECK128_AdvancedProcessBlocks_SSSE3 loads each SPECK-128 block into a
|
// Hack ahead... Rearrange the data for vectorization. It is easier to permute
|
||||||
// __m128i. We can't SSE over them, so we rearrange the data to allow packed operations.
|
// the data in SPECK128_Enc_Blocks then SPECK128_AdvancedProcessBlocks_SSSE3.
|
||||||
// Its also easier to permute them in SPECK128_Enc_Block rather than the calling code.
|
// The zero block below is a "don't care". It is present so we can vectorize.
|
||||||
// SPECK128_AdvancedProcessBlocks_SSSE3 is rather messy. The zero block below is a
|
|
||||||
// "don't care". It is present so we can vectorize SPECK128_Enc_Block.
|
|
||||||
__m128i block1 = _mm_setzero_si128();
|
__m128i block1 = _mm_setzero_si128();
|
||||||
__m128i x1 = _mm_unpacklo_epi64(block0, block1);
|
__m128i x1 = _mm_unpacklo_epi64(block0, block1);
|
||||||
__m128i y1 = _mm_unpackhi_epi64(block0, block1);
|
__m128i y1 = _mm_unpackhi_epi64(block0, block1);
|
||||||
@ -465,10 +453,8 @@ inline void SPECK128_Enc_Block(__m128i &block0, const word64 *subkeys, unsigned
|
|||||||
inline void SPECK128_Enc_4_Blocks(__m128i &block0, __m128i &block1,
|
inline void SPECK128_Enc_4_Blocks(__m128i &block0, __m128i &block1,
|
||||||
__m128i &block2, __m128i &block3, const word64 *subkeys, unsigned int rounds)
|
__m128i &block2, __m128i &block3, const word64 *subkeys, unsigned int rounds)
|
||||||
{
|
{
|
||||||
// Hack ahead... SPECK128_AdvancedProcessBlocks_SSSE3 loads each SPECK-128 block into a
|
// Hack ahead... Rearrange the data for vectorization. It is easier to permute
|
||||||
// __m128i. We can't SSE over them, so we rearrange the data to allow packed operations.
|
// the data in SPECK128_Enc_Blocks then SPECK128_AdvancedProcessBlocks_SSSE3.
|
||||||
// Its also easier to permute them in SPECK128_Enc_4_Blocks rather than the calling code.
|
|
||||||
// SPECK128_AdvancedProcessBlocks_SSSE3 is rather messy.
|
|
||||||
__m128i x1 = _mm_unpacklo_epi64(block0, block1);
|
__m128i x1 = _mm_unpacklo_epi64(block0, block1);
|
||||||
__m128i y1 = _mm_unpackhi_epi64(block0, block1);
|
__m128i y1 = _mm_unpackhi_epi64(block0, block1);
|
||||||
__m128i x2 = _mm_unpacklo_epi64(block2, block3);
|
__m128i x2 = _mm_unpacklo_epi64(block2, block3);
|
||||||
@ -510,11 +496,9 @@ inline void SPECK128_Enc_4_Blocks(__m128i &block0, __m128i &block1,
|
|||||||
|
|
||||||
inline void SPECK128_Dec_Block(__m128i &block0, const word64 *subkeys, unsigned int rounds)
|
inline void SPECK128_Dec_Block(__m128i &block0, const word64 *subkeys, unsigned int rounds)
|
||||||
{
|
{
|
||||||
// Hack ahead... SPECK128_AdvancedProcessBlocks_SSSE3 loads each SPECK-128 block into a
|
// Hack ahead... Rearrange the data for vectorization. It is easier to permute
|
||||||
// __m128i. We can't SSE over them, so we rearrange the data to allow packed operations.
|
// the data in SPECK128_Dec_Blocks then SPECK128_AdvancedProcessBlocks_SSSE3.
|
||||||
// Its also easier to permute them in SPECK128_Dec_Block rather than the calling code.
|
// The zero block below is a "don't care". It is present so we can vectorize.
|
||||||
// SPECK128_AdvancedProcessBlocks_SSSE3 is rather messy. The zero block below is a
|
|
||||||
// "don't care". It is present so we can vectorize SPECK128_Dec_Block.
|
|
||||||
__m128i block1 = _mm_setzero_si128();
|
__m128i block1 = _mm_setzero_si128();
|
||||||
__m128i x1 = _mm_unpacklo_epi64(block0, block1);
|
__m128i x1 = _mm_unpacklo_epi64(block0, block1);
|
||||||
__m128i y1 = _mm_unpackhi_epi64(block0, block1);
|
__m128i y1 = _mm_unpackhi_epi64(block0, block1);
|
||||||
@ -545,10 +529,8 @@ inline void SPECK128_Dec_Block(__m128i &block0, const word64 *subkeys, unsigned
|
|||||||
inline void SPECK128_Dec_4_Blocks(__m128i &block0, __m128i &block1,
|
inline void SPECK128_Dec_4_Blocks(__m128i &block0, __m128i &block1,
|
||||||
__m128i &block2, __m128i &block3, const word64 *subkeys, unsigned int rounds)
|
__m128i &block2, __m128i &block3, const word64 *subkeys, unsigned int rounds)
|
||||||
{
|
{
|
||||||
// Hack ahead... SPECK128_AdvancedProcessBlocks_SSSE3 loads each SPECK-128 block into a
|
// Hack ahead... Rearrange the data for vectorization. It is easier to permute
|
||||||
// __m128i. We can't SSE over them, so we rearrange the data to allow packed operations.
|
// the data in SPECK128_Dec_Blocks then SPECK128_AdvancedProcessBlocks_SSSE3.
|
||||||
// Its also easier to permute them in SPECK128_Dec_4_Blocks rather than the calling code.
|
|
||||||
// SPECK128_AdvancedProcessBlocks_SSSE3 is rather messy.
|
|
||||||
__m128i x1 = _mm_unpacklo_epi64(block0, block1);
|
__m128i x1 = _mm_unpacklo_epi64(block0, block1);
|
||||||
__m128i y1 = _mm_unpackhi_epi64(block0, block1);
|
__m128i y1 = _mm_unpackhi_epi64(block0, block1);
|
||||||
__m128i x2 = _mm_unpacklo_epi64(block2, block3);
|
__m128i x2 = _mm_unpacklo_epi64(block2, block3);
|
||||||
|
12
speck.cpp
12
speck.cpp
@ -22,8 +22,8 @@ ANONYMOUS_NAMESPACE_BEGIN
|
|||||||
|
|
||||||
using CryptoPP::word32;
|
using CryptoPP::word32;
|
||||||
using CryptoPP::word64;
|
using CryptoPP::word64;
|
||||||
using CryptoPP::rotlFixed;
|
using CryptoPP::rotlConstant;
|
||||||
using CryptoPP::rotrFixed;
|
using CryptoPP::rotrConstant;
|
||||||
|
|
||||||
//! \brief Forward round transformation
|
//! \brief Forward round transformation
|
||||||
//! \tparam W word type
|
//! \tparam W word type
|
||||||
@ -35,9 +35,9 @@ using CryptoPP::rotrFixed;
|
|||||||
template <class W>
|
template <class W>
|
||||||
inline void TF83(W& x, W& y, const W k)
|
inline void TF83(W& x, W& y, const W k)
|
||||||
{
|
{
|
||||||
x = rotrFixed(x, 8);
|
x = rotrConstant<8>(x);
|
||||||
x += y; x ^= k;
|
x += y; x ^= k;
|
||||||
y = rotlFixed(y, 3);
|
y = rotlConstant<3>(y);
|
||||||
y ^= x;
|
y ^= x;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,9 +52,9 @@ template <class W>
|
|||||||
inline void TR83(W& x, W& y, const W k)
|
inline void TR83(W& x, W& y, const W k)
|
||||||
{
|
{
|
||||||
y ^= x;
|
y ^= x;
|
||||||
y = rotrFixed(y,3);
|
y = rotrConstant<3>(y);
|
||||||
x ^= k; x -= y;
|
x ^= k; x -= y;
|
||||||
x = rotlFixed(x,8);
|
x = rotlConstant<8>(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
//! \brief Forward transformation
|
//! \brief Forward transformation
|
||||||
|
Loading…
Reference in New Issue
Block a user