mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2025-03-09 00:21:00 +00:00
Use EPI8 Shuffle rather than Shifts and Or for rotate when R=8
Louis Wingers and Bryan Weeks from the Simon and Speck team offered the suggestion. The change save 0.7 cpb for Speck, and 5 cpb for Simon on x86_64. Speck is now running very close to the Team's time sor SSE4. Simon is still off, but we know the root cause. For Simon, the Team used a fast bit-sliced implementation
This commit is contained in:
parent
bdb2db7ac2
commit
6e829cebee
@ -471,6 +471,24 @@ inline __m128i RotateRight64(const __m128i& val)
|
||||
return _mm_or_si128(a, b);
|
||||
}
|
||||
|
||||
// Faster than two Shifts and an Or
|
||||
template <>
|
||||
inline __m128i RotateLeft64<8>(const __m128i& val)
|
||||
{
|
||||
CRYPTOPP_ASSERT(R < 64);
|
||||
const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7);
|
||||
return _mm_shuffle_epi8(val, mask);
|
||||
}
|
||||
|
||||
// Faster than two Shifts and an Or
|
||||
template <>
|
||||
inline __m128i RotateRight64<8>(const __m128i& val)
|
||||
{
|
||||
CRYPTOPP_ASSERT(R < 64);
|
||||
const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1);
|
||||
return _mm_shuffle_epi8(val, mask);
|
||||
}
|
||||
|
||||
inline __m128i SIMON128_f(const __m128i& v)
|
||||
{
|
||||
return _mm_xor_si128(RotateLeft64<2>(v),
|
||||
|
@ -418,6 +418,24 @@ inline __m128i RotateRight64(const __m128i& val)
|
||||
return _mm_or_si128(a, b);
|
||||
}
|
||||
|
||||
// Faster than two Shifts and an Or
|
||||
template <>
|
||||
inline __m128i RotateLeft64<8>(const __m128i& val)
|
||||
{
|
||||
CRYPTOPP_ASSERT(R < 64);
|
||||
const __m128i mask = _mm_set_epi8(14,13,12,11, 10,9,8,15, 6,5,4,3, 2,1,0,7);
|
||||
return _mm_shuffle_epi8(val, mask);
|
||||
}
|
||||
|
||||
// Faster than two Shifts and an Or
|
||||
template <>
|
||||
inline __m128i RotateRight64<8>(const __m128i& val)
|
||||
{
|
||||
CRYPTOPP_ASSERT(R < 64);
|
||||
const __m128i mask = _mm_set_epi8(8,15,14,13, 12,11,10,9, 0,7,6,5, 4,3,2,1);
|
||||
return _mm_shuffle_epi8(val, mask);
|
||||
}
|
||||
|
||||
inline void SPECK128_Enc_Block(__m128i &block0, const word64 *subkeys, unsigned int rounds)
|
||||
{
|
||||
// Hack ahead... Rearrange the data for vectorization. It is easier to permute
|
||||
|
Loading…
x
Reference in New Issue
Block a user