From b54a41d9f21112f7230f972295f6bc7232504e1c Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Sun, 13 Feb 2022 21:18:43 -0500 Subject: [PATCH] Try fix ProcessData in CFB_CipherTemplate and AdditiveCipherTemplate This commit attempts to restore performance while taming the optimizer. Also see GH #683, GH #1010, GH #1088, GH #1103. --- misc.cpp | 2 +- strciphr.cpp | 70 +++++++++++----------------------------------------- strciphr.h | 5 +--- 3 files changed, 16 insertions(+), 61 deletions(-) diff --git a/misc.cpp b/misc.cpp index 83c0d1cd..cdd4a306 100644 --- a/misc.cpp +++ b/misc.cpp @@ -35,7 +35,7 @@ # include # endif -# if defined(__aarch64__) || defined(__aarch32__) || defined(_M_ARM64) +# if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64) # if (CRYPTOPP_ARM_NEON_HEADER) || (CRYPTOPP_ARM_ASIMD_AVAILABLE) # include # endif diff --git a/strciphr.cpp b/strciphr.cpp index 1569f19b..db4a04ca 100644 --- a/strciphr.cpp +++ b/strciphr.cpp @@ -55,7 +55,7 @@ void AdditiveCipherTemplate::GenerateBlock(byte *outString, size_t length) } PolicyInterface &policy = this->AccessPolicy(); - unsigned int bytesPerIteration = policy.GetBytesPerIteration(); + size_t bytesPerIteration = policy.GetBytesPerIteration(); if (length >= bytesPerIteration) { @@ -93,26 +93,8 @@ void AdditiveCipherTemplate::ProcessData(byte *outString, const byte *inStrin CRYPTOPP_ASSERT(outString); CRYPTOPP_ASSERT(inString); CRYPTOPP_ASSERT(length % this->MandatoryBlockSize() == 0); - // If this assert fires, then outString == inString. You could experience a - // performance hit. Also see https://github.com/weidai11/cryptopp/issues/1088' - CRYPTOPP_ASSERT(outString != inString); - PolicyInterface &policy = this->AccessPolicy(); - unsigned int bytesPerIteration = policy.GetBytesPerIteration(); - - byte* savedOutString = outString; - size_t savedLength = length; - bool copyOut = false; - - if (inString == outString) - { - // No need to copy inString to outString. - // Just allocate the space. - m_tempOutString.New(length); - m_tempOutString.SetMark(0); - outString = m_tempOutString.BytePtr(); - copyOut = true; - } + size_t bytesPerIteration = policy.GetBytesPerIteration(); if (m_leftOver > 0) { @@ -124,13 +106,9 @@ void AdditiveCipherTemplate::ProcessData(byte *outString, const byte *inStrin length -= len; m_leftOver -= len; } - if (!length) { - if (copyOut) - std::memcpy(savedOutString, m_tempOutString.BytePtr(), savedLength); - return; - } + if (!length) { return; } - const unsigned int alignment = policy.GetAlignment(); + const word32 alignment = policy.GetAlignment(); const bool inAligned = IsAlignedOn(inString, alignment); const bool outAligned = IsAlignedOn(outString, alignment); CRYPTOPP_UNUSED(inAligned); CRYPTOPP_UNUSED(outAligned); @@ -143,6 +121,10 @@ void AdditiveCipherTemplate::ProcessData(byte *outString, const byte *inStrin KeystreamOperation operation = KeystreamOperation(flags); policy.OperateKeystream(operation, outString, inString, iterations); + // Try to tame the optimizer. This is GH #683, #1010, and #1088. + volatile byte* unused = const_cast(outString); + CRYPTOPP_UNUSED(unused); + inString = PtrAdd(inString, iterations * bytesPerIteration); outString = PtrAdd(outString, iterations * bytesPerIteration); length -= iterations * bytesPerIteration; @@ -171,9 +153,6 @@ void AdditiveCipherTemplate::ProcessData(byte *outString, const byte *inStrin m_leftOver = bufferByteSize - length; } - - if (copyOut) - std::memcpy(savedOutString, m_tempOutString.BytePtr(), savedLength); } template @@ -244,28 +223,10 @@ void CFB_CipherTemplate::ProcessData(byte *outString, const byte *inString CRYPTOPP_ASSERT(outString); CRYPTOPP_ASSERT(inString); CRYPTOPP_ASSERT(length % this->MandatoryBlockSize() == 0); - // If this assert fires, then outString == inString. You could experience a - // performance hit. Also see https://github.com/weidai11/cryptopp/issues/1088' - CRYPTOPP_ASSERT(outString != inString); - PolicyInterface &policy = this->AccessPolicy(); unsigned int bytesPerIteration = policy.GetBytesPerIteration(); byte *reg = policy.GetRegisterBegin(); - byte* savedOutString = outString; - size_t savedLength = length; - bool copyOut = false; - - if (inString == outString) - { - // No need to copy inString to outString. - // Just allocate the space. - m_tempOutString.New(length); - m_tempOutString.SetMark(0); - outString = m_tempOutString.BytePtr(); - copyOut = true; - } - if (m_leftOver) { const size_t len = STDMIN(m_leftOver, length); @@ -276,13 +237,9 @@ void CFB_CipherTemplate::ProcessData(byte *outString, const byte *inString m_leftOver -= len; length -= len; } - if (!length) { - if (copyOut) - std::memcpy(savedOutString, m_tempOutString.BytePtr(), savedLength); - return; - } + if (!length) { return; } - const unsigned int alignment = policy.GetAlignment(); + const word32 alignment = policy.GetAlignment(); const bool inAligned = IsAlignedOn(inString, alignment); const bool outAligned = IsAlignedOn(outString, alignment); CRYPTOPP_UNUSED(inAligned); CRYPTOPP_UNUSED(outAligned); @@ -292,6 +249,10 @@ void CFB_CipherTemplate::ProcessData(byte *outString, const byte *inString CipherDir cipherDir = GetCipherDir(*this); policy.Iterate(outString, inString, cipherDir, length / bytesPerIteration); + // Try to tame the optimizer. This is GH #683, #1010, and #1088. + volatile byte* unused = const_cast(outString); + CRYPTOPP_UNUSED(unused); + const size_t remainder = length % bytesPerIteration; inString = PtrAdd(inString, length - remainder); outString = PtrAdd(outString, length - remainder); @@ -314,9 +275,6 @@ void CFB_CipherTemplate::ProcessData(byte *outString, const byte *inString CombineMessageAndShiftRegister(outString, reg, inString, length); m_leftOver = bytesPerIteration - length; } - - if (copyOut) - std::memcpy(savedOutString, m_tempOutString.BytePtr(), savedLength); } template diff --git a/strciphr.h b/strciphr.h index ad905eeb..710944fc 100644 --- a/strciphr.h +++ b/strciphr.h @@ -394,8 +394,7 @@ protected: inline byte * KeystreamBufferBegin() {return this->m_buffer.data();} inline byte * KeystreamBufferEnd() {return (PtrAdd(this->m_buffer.data(), this->m_buffer.size()));} - // m_tempOutString added due to GH #1010 - AlignedSecByteBlock m_buffer, m_tempOutString; + AlignedSecByteBlock m_buffer; size_t m_leftOver; }; @@ -646,8 +645,6 @@ protected: void UncheckedSetKey(const byte *key, unsigned int length, const NameValuePairs ¶ms); - // m_tempOutString added due to GH #1010 - AlignedSecByteBlock m_tempOutString; size_t m_leftOver; };