Use EPI8 Shuffle rather than Shifts and Or for rotate when R=8

Louis Wingers and Bryan Weeks from the Simon and Speck team offered the suggestion. The change save 0.7 cpb for Speck, and 5 cpb for Simon on x86_64.
Speck is now running very close to the Team's time sor SSE4. Simon is still off, but we know the root cause. For Simon, the Team used a fast bit-sliced implementation
This commit is contained in:
Jeffrey Walton 2017-11-29 08:53:48 -05:00
parent bdb2db7ac2
commit 6e829cebee
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 36 additions and 0 deletions

View File

@ -471,6 +471,24 @@ inline __m128i RotateRight64(const __m128i& val)
return _mm_or_si128(a, b);
}
// Faster than two Shifts and an Or
template <>
inline __m128i RotateLeft64<8>(const __m128i& val)
{
CRYPTOPP_ASSERT(R < 64);
const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7);
return _mm_shuffle_epi8(val, mask);
}
// Faster than two Shifts and an Or
template <>
inline __m128i RotateRight64<8>(const __m128i& val)
{
CRYPTOPP_ASSERT(R < 64);
const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1);
return _mm_shuffle_epi8(val, mask);
}
inline __m128i SIMON128_f(const __m128i& v)
{
return _mm_xor_si128(RotateLeft64<2>(v),

View File

@ -418,6 +418,24 @@ inline __m128i RotateRight64(const __m128i& val)
return _mm_or_si128(a, b);
}
// Faster than two Shifts and an Or
template <>
inline __m128i RotateLeft64<8>(const __m128i& val)
{
CRYPTOPP_ASSERT(R < 64);
const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7);
return _mm_shuffle_epi8(val, mask);
}
// Faster than two Shifts and an Or
template <>
inline __m128i RotateRight64<8>(const __m128i& val)
{
CRYPTOPP_ASSERT(R < 64);
const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1);
return _mm_shuffle_epi8(val, mask);
}
inline void SPECK128_Enc_Block(__m128i &block0, const word64 *subkeys, unsigned int rounds)
{
// Hack ahead... Rearrange the data for vectorization. It is easier to permute