Try fix ProcessData in CFB_CipherTemplate and AdditiveCipherTemplate

This commit attempts to restore performance while taming the optimizer.

Also see GH #683, GH #1010, GH #1088, GH #1103.
This commit is contained in:
Jeffrey Walton 2022-02-13 21:18:43 -05:00 committed by GitHub
parent 711e79f102
commit b54a41d9f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 61 deletions

View File

@ -35,7 +35,7 @@
# include <immintrin.h> # include <immintrin.h>
# endif # endif
# if defined(__aarch64__) || defined(__aarch32__) || defined(_M_ARM64) # if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
# if (CRYPTOPP_ARM_NEON_HEADER) || (CRYPTOPP_ARM_ASIMD_AVAILABLE) # if (CRYPTOPP_ARM_NEON_HEADER) || (CRYPTOPP_ARM_ASIMD_AVAILABLE)
# include <arm_neon.h> # include <arm_neon.h>
# endif # endif

View File

@ -55,7 +55,7 @@ void AdditiveCipherTemplate<S>::GenerateBlock(byte *outString, size_t length)
} }
PolicyInterface &policy = this->AccessPolicy(); PolicyInterface &policy = this->AccessPolicy();
unsigned int bytesPerIteration = policy.GetBytesPerIteration(); size_t bytesPerIteration = policy.GetBytesPerIteration();
if (length >= bytesPerIteration) if (length >= bytesPerIteration)
{ {
@ -93,26 +93,8 @@ void AdditiveCipherTemplate<S>::ProcessData(byte *outString, const byte *inStrin
CRYPTOPP_ASSERT(outString); CRYPTOPP_ASSERT(inString); CRYPTOPP_ASSERT(outString); CRYPTOPP_ASSERT(inString);
CRYPTOPP_ASSERT(length % this->MandatoryBlockSize() == 0); CRYPTOPP_ASSERT(length % this->MandatoryBlockSize() == 0);
// If this assert fires, then outString == inString. You could experience a
// performance hit. Also see https://github.com/weidai11/cryptopp/issues/1088'
CRYPTOPP_ASSERT(outString != inString);
PolicyInterface &policy = this->AccessPolicy(); PolicyInterface &policy = this->AccessPolicy();
unsigned int bytesPerIteration = policy.GetBytesPerIteration(); size_t bytesPerIteration = policy.GetBytesPerIteration();
byte* savedOutString = outString;
size_t savedLength = length;
bool copyOut = false;
if (inString == outString)
{
// No need to copy inString to outString.
// Just allocate the space.
m_tempOutString.New(length);
m_tempOutString.SetMark(0);
outString = m_tempOutString.BytePtr();
copyOut = true;
}
if (m_leftOver > 0) if (m_leftOver > 0)
{ {
@ -124,13 +106,9 @@ void AdditiveCipherTemplate<S>::ProcessData(byte *outString, const byte *inStrin
length -= len; m_leftOver -= len; length -= len; m_leftOver -= len;
} }
if (!length) { if (!length) { return; }
if (copyOut)
std::memcpy(savedOutString, m_tempOutString.BytePtr(), savedLength);
return;
}
const unsigned int alignment = policy.GetAlignment(); const word32 alignment = policy.GetAlignment();
const bool inAligned = IsAlignedOn(inString, alignment); const bool inAligned = IsAlignedOn(inString, alignment);
const bool outAligned = IsAlignedOn(outString, alignment); const bool outAligned = IsAlignedOn(outString, alignment);
CRYPTOPP_UNUSED(inAligned); CRYPTOPP_UNUSED(outAligned); CRYPTOPP_UNUSED(inAligned); CRYPTOPP_UNUSED(outAligned);
@ -143,6 +121,10 @@ void AdditiveCipherTemplate<S>::ProcessData(byte *outString, const byte *inStrin
KeystreamOperation operation = KeystreamOperation(flags); KeystreamOperation operation = KeystreamOperation(flags);
policy.OperateKeystream(operation, outString, inString, iterations); policy.OperateKeystream(operation, outString, inString, iterations);
// Try to tame the optimizer. This is GH #683, #1010, and #1088.
volatile byte* unused = const_cast<volatile byte*>(outString);
CRYPTOPP_UNUSED(unused);
inString = PtrAdd(inString, iterations * bytesPerIteration); inString = PtrAdd(inString, iterations * bytesPerIteration);
outString = PtrAdd(outString, iterations * bytesPerIteration); outString = PtrAdd(outString, iterations * bytesPerIteration);
length -= iterations * bytesPerIteration; length -= iterations * bytesPerIteration;
@ -171,9 +153,6 @@ void AdditiveCipherTemplate<S>::ProcessData(byte *outString, const byte *inStrin
m_leftOver = bufferByteSize - length; m_leftOver = bufferByteSize - length;
} }
if (copyOut)
std::memcpy(savedOutString, m_tempOutString.BytePtr(), savedLength);
} }
template <class S> template <class S>
@ -244,28 +223,10 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
CRYPTOPP_ASSERT(outString); CRYPTOPP_ASSERT(inString); CRYPTOPP_ASSERT(outString); CRYPTOPP_ASSERT(inString);
CRYPTOPP_ASSERT(length % this->MandatoryBlockSize() == 0); CRYPTOPP_ASSERT(length % this->MandatoryBlockSize() == 0);
// If this assert fires, then outString == inString. You could experience a
// performance hit. Also see https://github.com/weidai11/cryptopp/issues/1088'
CRYPTOPP_ASSERT(outString != inString);
PolicyInterface &policy = this->AccessPolicy(); PolicyInterface &policy = this->AccessPolicy();
unsigned int bytesPerIteration = policy.GetBytesPerIteration(); unsigned int bytesPerIteration = policy.GetBytesPerIteration();
byte *reg = policy.GetRegisterBegin(); byte *reg = policy.GetRegisterBegin();
byte* savedOutString = outString;
size_t savedLength = length;
bool copyOut = false;
if (inString == outString)
{
// No need to copy inString to outString.
// Just allocate the space.
m_tempOutString.New(length);
m_tempOutString.SetMark(0);
outString = m_tempOutString.BytePtr();
copyOut = true;
}
if (m_leftOver) if (m_leftOver)
{ {
const size_t len = STDMIN(m_leftOver, length); const size_t len = STDMIN(m_leftOver, length);
@ -276,13 +237,9 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
m_leftOver -= len; length -= len; m_leftOver -= len; length -= len;
} }
if (!length) { if (!length) { return; }
if (copyOut)
std::memcpy(savedOutString, m_tempOutString.BytePtr(), savedLength);
return;
}
const unsigned int alignment = policy.GetAlignment(); const word32 alignment = policy.GetAlignment();
const bool inAligned = IsAlignedOn(inString, alignment); const bool inAligned = IsAlignedOn(inString, alignment);
const bool outAligned = IsAlignedOn(outString, alignment); const bool outAligned = IsAlignedOn(outString, alignment);
CRYPTOPP_UNUSED(inAligned); CRYPTOPP_UNUSED(outAligned); CRYPTOPP_UNUSED(inAligned); CRYPTOPP_UNUSED(outAligned);
@ -292,6 +249,10 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
CipherDir cipherDir = GetCipherDir(*this); CipherDir cipherDir = GetCipherDir(*this);
policy.Iterate(outString, inString, cipherDir, length / bytesPerIteration); policy.Iterate(outString, inString, cipherDir, length / bytesPerIteration);
// Try to tame the optimizer. This is GH #683, #1010, and #1088.
volatile byte* unused = const_cast<volatile byte*>(outString);
CRYPTOPP_UNUSED(unused);
const size_t remainder = length % bytesPerIteration; const size_t remainder = length % bytesPerIteration;
inString = PtrAdd(inString, length - remainder); inString = PtrAdd(inString, length - remainder);
outString = PtrAdd(outString, length - remainder); outString = PtrAdd(outString, length - remainder);
@ -314,9 +275,6 @@ void CFB_CipherTemplate<BASE>::ProcessData(byte *outString, const byte *inString
CombineMessageAndShiftRegister(outString, reg, inString, length); CombineMessageAndShiftRegister(outString, reg, inString, length);
m_leftOver = bytesPerIteration - length; m_leftOver = bytesPerIteration - length;
} }
if (copyOut)
std::memcpy(savedOutString, m_tempOutString.BytePtr(), savedLength);
} }
template <class BASE> template <class BASE>

View File

@ -394,8 +394,7 @@ protected:
inline byte * KeystreamBufferBegin() {return this->m_buffer.data();} inline byte * KeystreamBufferBegin() {return this->m_buffer.data();}
inline byte * KeystreamBufferEnd() {return (PtrAdd(this->m_buffer.data(), this->m_buffer.size()));} inline byte * KeystreamBufferEnd() {return (PtrAdd(this->m_buffer.data(), this->m_buffer.size()));}
// m_tempOutString added due to GH #1010 AlignedSecByteBlock m_buffer;
AlignedSecByteBlock m_buffer, m_tempOutString;
size_t m_leftOver; size_t m_leftOver;
}; };
@ -646,8 +645,6 @@ protected:
void UncheckedSetKey(const byte *key, unsigned int length, const NameValuePairs &params); void UncheckedSetKey(const byte *key, unsigned int length, const NameValuePairs &params);
// m_tempOutString added due to GH #1010
AlignedSecByteBlock m_tempOutString;
size_t m_leftOver; size_t m_leftOver;
}; };