Fix global optimization bug for ChaCha AVX2 under VS2017 (GH #735)

Also see https://github.com/weidai11/cryptopp/issues/649. The 649 issue is the one affecting AES. It appears to be the same problem.
This commit is contained in:
Jeffrey Walton 2018-11-09 08:00:53 -05:00
parent af9fb9d21e
commit 092309b266
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 21 additions and 6 deletions

View File

@ -36,10 +36,21 @@ extern const char CHACHA_AVX_FNAME[] = __FILE__;
# define MAYBE_CONST const
#endif
#if (CRYPTOPP_AVX2_AVAILABLE)
// VS2017 and global optimization bug. TODO, figure out when
// we can re-enable full optimizations for VS2017. Also see
// https://github.com/weidai11/cryptopp/issues/649 and
// https://github.com/weidai11/cryptopp/issues/735. The
// 649 issue affects AES but it is the same here. The 735
// issue is ChaCha AVX2 cut-in where it surfaced again.
#if (_MSC_VER >= 1910) && defined(NDEBUG)
# pragma optimize("", off)
# pragma optimize("ts", on)
#endif
ANONYMOUS_NAMESPACE_BEGIN
#if (CRYPTOPP_AVX2_AVAILABLE)
template <unsigned int R>
inline __m256i RotateLeft(const __m256i val)
{
@ -62,10 +73,14 @@ inline __m256i RotateLeft<16>(const __m256i val)
return _mm256_shuffle_epi8(val, mask);
}
#endif CRYPTOPP_AVX2_AVAILABLE
ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
#if (CRYPTOPP_AVX2_AVAILABLE)
void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input, byte *output, unsigned int rounds)
{
MAYBE_CONST __m128i* state_mm = (MAYBE_CONST __m128i*)(state);
@ -358,6 +373,6 @@ void ChaCha_OperateKeystream_AVX2(const word32 *state, const byte* input, byte *
}
}
NAMESPACE_END
#endif // CRYPTOPP_AVX2_AVAILABLE
NAMESPACE_END

View File

@ -88,10 +88,10 @@ being unloaded from L1 cache, until that round is finished.
#include "misc.h"
#include "cpu.h"
// MSVC bug, still don't know how to fix it. TODO, figure out
// when we can re-enable optimizations for MSVC. Also see
// VS2017 and global optimization bug. TODO, figure out when
// we can re-enable full optimizations for VS2017. Also see
// https://github.com/weidai11/cryptopp/issues/649
#if defined(_MSC_VER) && (_MSC_VER >= 1910)
#if (_MSC_VER >= 1910) && defined(NDEBUG)
# pragma optimize("", off)
# pragma optimize("ts", on)
#endif