Rewrite BLAKE2 classes

The ParameterBlocks for BLAKE2 had undefined behavior. We relied on the compiler packing the bytes in the structure, then we used the first byte as the start of an array.

This rewrite does things correctly. We don't memset the structure, and we don't treat the structure as a contiguous array.
This commit is contained in:
Jeffrey Walton 2018-11-20 23:32:35 -05:00
parent 13bee44388
commit a65d55a3fd
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
4 changed files with 492 additions and 430 deletions

View File

@ -172,194 +172,24 @@ extern void BLAKE2_Compress32_ALTIVEC(const byte* input, BLAKE2s_State& state);
extern void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state);
#endif
BLAKE2s_ParameterBlock::BLAKE2s_ParameterBlock(size_t digestLen, size_t keyLen,
const byte* saltStr, size_t saltLen,
const byte* personalizationStr, size_t personalizationLen)
unsigned int BLAKE2b::OptimalDataAlignment() const
{
digestLength = (byte)digestLen;
keyLength = (byte)keyLen;
fanout = depth = 1;
nodeDepth = innerLength = 0;
std::memset(leafLength, 0x00, COUNTOF(leafLength));
std::memset(nodeOffset, 0x00, COUNTOF(nodeOffset));
if (saltStr && saltLen)
{
memcpy_s(salt, COUNTOF(salt), saltStr, saltLen);
size_t rem = SaturatingSubtract(COUNTOF(salt), saltLen);
size_t off = COUNTOF(salt) - rem;
if (rem)
std::memset(salt+off, 0x00, rem);
}
#if defined(CRYPTOPP_SSE41_AVAILABLE)
if (HasSSE41())
return 16;
else
{
std::memset(salt, 0x00, COUNTOF(salt));
}
if (personalizationStr && personalizationLen)
{
memcpy_s(personalization, COUNTOF(personalization), personalizationStr, personalizationLen);
size_t rem = SaturatingSubtract(COUNTOF(personalization), personalizationLen);
size_t off = COUNTOF(personalization) - rem;
if (rem)
std::memset(personalization+off, 0x00, rem);
}
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return 4;
else
{
std::memset(personalization, 0x00, COUNTOF(personalization));
}
}
BLAKE2b_ParameterBlock::BLAKE2b_ParameterBlock(size_t digestLen, size_t keyLen,
const byte* saltStr, size_t saltLen,
const byte* personalizationStr, size_t personalizationLen)
{
digestLength = (byte)digestLen;
keyLength = (byte)keyLen;
fanout = depth = 1;
nodeDepth = innerLength = 0;
std::memset(rfu, 0x00, COUNTOF(rfu));
std::memset(leafLength, 0x00, COUNTOF(leafLength));
std::memset(nodeOffset, 0x00, COUNTOF(nodeOffset));
if (saltStr && saltLen)
{
memcpy_s(salt, COUNTOF(salt), saltStr, saltLen);
size_t rem = SaturatingSubtract(COUNTOF(salt), saltLen);
size_t off = COUNTOF(salt) - rem;
if (rem)
std::memset(salt+off, 0x00, rem);
}
#endif
#if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return 16;
else
{
std::memset(salt, 0x00, COUNTOF(salt));
}
if (personalizationStr && personalizationLen)
{
memcpy_s(personalization, COUNTOF(personalization), personalizationStr, personalizationLen);
size_t rem = SaturatingSubtract(COUNTOF(personalization), personalizationLen);
size_t off = COUNTOF(personalization) - rem;
if (rem)
std::memset(personalization+off, 0x00, rem);
}
else
{
std::memset(personalization, 0x00, COUNTOF(personalization));
}
}
void BLAKE2s::UncheckedSetKey(const byte *key, unsigned int length, const CryptoPP::NameValuePairs& params)
{
if (key && length)
{
AlignedSecByteBlock temp(BLOCKSIZE);
memcpy_s(temp, BLOCKSIZE, key, length);
size_t rem = SaturatingSubtract((unsigned int)BLOCKSIZE, length);
if (rem)
std::memset(temp+length, 0x00, rem);
m_key.swap(temp);
}
else
{
m_key.resize(0);
}
ParameterBlock& block = *m_block.data();
std::memset(block.leafLength, 0x00, COUNTOF(block.leafLength));
std::memset(block.nodeOffset, 0x00, COUNTOF(block.nodeOffset));
block.nodeDepth = block.innerLength = 0;
block.keyLength = (byte)length;
block.digestLength = (byte)params.GetIntValueWithDefault(Name::DigestSize(), DIGESTSIZE);
block.fanout = block.depth = 1;
ConstByteArrayParameter t;
if (params.GetValue(Name::Salt(), t) && t.begin() && t.size())
{
memcpy_s(block.salt, COUNTOF(block.salt), t.begin(), t.size());
size_t rem = SaturatingSubtract(COUNTOF(block.salt), t.size());
size_t off = COUNTOF(block.salt) - rem;
if (rem)
std::memset(block.salt+off, 0x00, rem);
}
else
{
std::memset(block.salt, 0x00, COUNTOF(block.salt));
}
if (params.GetValue(Name::Personalization(), t) && t.begin() && t.size())
{
memcpy_s(block.personalization, COUNTOF(block.personalization), t.begin(), t.size());
size_t rem = SaturatingSubtract(COUNTOF(block.personalization), t.size());
size_t off = COUNTOF(block.personalization) - rem;
if (rem)
std::memset(block.personalization+off, 0x00, rem);
}
else
{
std::memset(block.personalization, 0x00, COUNTOF(block.personalization));
}
}
void BLAKE2b::UncheckedSetKey(const byte *key, unsigned int length, const CryptoPP::NameValuePairs& params)
{
if (key && length)
{
AlignedSecByteBlock temp(BLOCKSIZE);
memcpy_s(temp, BLOCKSIZE, key, length);
size_t rem = SaturatingSubtract((unsigned int)BLOCKSIZE, length);
if (rem)
std::memset(temp+length, 0x00, rem);
m_key.swap(temp);
}
else
{
m_key.resize(0);
}
ParameterBlock& block = *m_block.data();
std::memset(block.leafLength, 0x00, COUNTOF(block.leafLength));
std::memset(block.nodeOffset, 0x00, COUNTOF(block.nodeOffset));
std::memset(block.rfu, 0x00, COUNTOF(block.rfu));
block.nodeDepth = block.innerLength = 0;
block.keyLength = (byte)length;
block.digestLength = (byte)params.GetIntValueWithDefault(Name::DigestSize(), DIGESTSIZE);
block.fanout = block.depth = 1;
ConstByteArrayParameter t;
if (params.GetValue(Name::Salt(), t) && t.begin() && t.size())
{
memcpy_s(block.salt, COUNTOF(block.salt), t.begin(), t.size());
size_t rem = SaturatingSubtract(COUNTOF(block.salt), t.size());
size_t off = COUNTOF(block.salt) - rem;
if (rem)
std::memset(block.salt+off, 0x00, rem);
}
else
{
std::memset(block.salt, 0x00, COUNTOF(block.salt));
}
if (params.GetValue(Name::Personalization(), t) && t.begin() && t.size())
{
memcpy_s(block.personalization, COUNTOF(block.personalization), t.begin(), t.size());
size_t rem = SaturatingSubtract(COUNTOF(block.personalization), t.size());
size_t off = COUNTOF(block.personalization) - rem;
if (rem)
std::memset(block.personalization+off, 0x00, rem);
}
else
{
std::memset(block.personalization, 0x00, COUNTOF(block.personalization));
}
#endif
return GetAlignmentOf<word32>();
}
std::string BLAKE2b::AlgorithmProvider() const
@ -367,225 +197,387 @@ std::string BLAKE2b::AlgorithmProvider() const
#if defined(CRYPTOPP_SSE41_AVAILABLE)
if (HasSSE41())
return "SSE4.1";
else
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return "NEON";
else
#endif
#if (CRYPTOPP_POWER8_AVAILABLE)
if (HasPower8())
return "Power8";
else
#endif
return "C++";
}
unsigned int BLAKE2s::OptimalDataAlignment() const
{
#if defined(CRYPTOPP_SSE41_AVAILABLE)
if (HasSSE41())
return 16;
else
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return 4;
else
#endif
#if (CRYPTOPP_POWER7_AVAILABLE)
if (HasPower7())
return 16;
else
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
if (HasAltivec())
return 16;
else
#endif
return GetAlignmentOf<word32>();
}
std::string BLAKE2s::AlgorithmProvider() const
{
#if defined(CRYPTOPP_SSE41_AVAILABLE)
if (HasSSE41())
return "SSE4.1";
else
#endif
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
if (HasNEON())
return "NEON";
else
#endif
#if (CRYPTOPP_POWER7_AVAILABLE)
if (HasPower7())
return "Power7";
else
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
if (HasAltivec())
return "Altivec";
else
#endif
return "C++";
}
BLAKE2s::BLAKE2s(bool treeMode, unsigned int digestSize) : m_state(1), m_block(1), m_digestSize(digestSize), m_treeMode(treeMode)
void BLAKE2s_State::Reset()
{
CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE);
UncheckedSetKey(NULLPTR, 0, MakeParameters(Name::DigestSize(), (int)digestSize)(Name::TreeMode(), treeMode, false));
Restart();
std::memset(m_hft, 0x00, m_hft.SizeInBytes());
m_len = 0;
}
BLAKE2b::BLAKE2b(bool treeMode, unsigned int digestSize) : m_state(1), m_block(1), m_digestSize(digestSize), m_treeMode(treeMode)
void BLAKE2b_State::Reset()
{
std::memset(m_hft, 0x00, m_hft.SizeInBytes());
m_len = 0;
}
BLAKE2s_ParameterBlock::BLAKE2s_ParameterBlock(size_t digestLen, size_t keyLen,
const byte* saltStr, size_t saltLen,
const byte* personalizationStr, size_t personalizationLen)
{
Reset(digestLen, keyLen);
if (saltStr && saltLen)
memcpy_s(salt(), SALTSIZE, saltStr, saltLen);
if (personalizationStr && personalizationLen)
memcpy_s(personalization(), PERSONALIZATIONSIZE, personalizationStr, personalizationLen);
}
BLAKE2b_ParameterBlock::BLAKE2b_ParameterBlock(size_t digestLen, size_t keyLen,
const byte* saltStr, size_t saltLen,
const byte* personalizationStr, size_t personalizationLen)
{
Reset(digestLen, keyLen);
if (saltStr && saltLen)
memcpy_s(salt(), SALTSIZE, saltStr, saltLen);
if (personalizationStr && personalizationLen)
memcpy_s(personalization(), PERSONALIZATIONSIZE, personalizationStr, personalizationLen);
}
void BLAKE2s_ParameterBlock::Reset(size_t digestLen, size_t keyLen)
{
std::memset(m_data, 0x00, m_data.size());
m_data[DigestOff] = static_cast<byte>(digestLen);
m_data[KeyOff] = static_cast<byte>(keyLen);
m_data[FanoutOff] = m_data[DepthOff] = 1;
}
void BLAKE2b_ParameterBlock::Reset(size_t digestLen, size_t keyLen)
{
std::memset(m_data, 0x00, m_data.size());
m_data[DigestOff] = static_cast<byte>(digestLen);
m_data[KeyOff] = static_cast<byte>(keyLen);
m_data[FanoutOff] = m_data[DepthOff] = 1;
}
BLAKE2s::BLAKE2s(bool treeMode, unsigned int digestSize)
: m_digestSize(digestSize), m_keyLength(0), m_treeMode(treeMode)
{
CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE);
UncheckedSetKey(NULLPTR, 0, MakeParameters(Name::DigestSize(), (int)digestSize)(Name::TreeMode(), treeMode, false));
Restart();
UncheckedSetKey(NULLPTR, 0, MakeParameters
(Name::DigestSize(), (int)digestSize)
(Name::TreeMode(), treeMode));
}
BLAKE2b::BLAKE2b(bool treeMode, unsigned int digestSize)
: m_digestSize(digestSize), m_keyLength(0), m_treeMode(treeMode)
{
CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE);
UncheckedSetKey(NULLPTR, 0, MakeParameters
(Name::DigestSize(), (int)digestSize)
(Name::TreeMode(), treeMode));
}
BLAKE2s::BLAKE2s(const byte *key, size_t keyLength, const byte* salt, size_t saltLength,
const byte* personalization, size_t personalizationLength, bool treeMode, unsigned int digestSize)
: m_state(1), m_block(1), m_digestSize(digestSize), m_treeMode(treeMode)
: m_digestSize(digestSize), m_keyLength(keyLength), m_treeMode(treeMode)
{
CRYPTOPP_ASSERT(keyLength <= MAX_KEYLENGTH);
CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE);
CRYPTOPP_ASSERT(saltLength <= SALTSIZE);
CRYPTOPP_ASSERT(personalizationLength <= PERSONALIZATIONSIZE);
UncheckedSetKey(key, static_cast<unsigned int>(keyLength), MakeParameters(Name::DigestSize(),(int)digestSize)(Name::TreeMode(),treeMode, false)
(Name::Salt(), ConstByteArrayParameter(salt, saltLength))(Name::Personalization(), ConstByteArrayParameter(personalization, personalizationLength)));
Restart();
UncheckedSetKey(key, static_cast<unsigned int>(keyLength), MakeParameters
(Name::DigestSize(),(int)digestSize)
(Name::TreeMode(),treeMode)
(Name::Salt(), ConstByteArrayParameter(salt, saltLength))
(Name::Personalization(), ConstByteArrayParameter(personalization, personalizationLength)));
}
BLAKE2b::BLAKE2b(const byte *key, size_t keyLength, const byte* salt, size_t saltLength,
const byte* personalization, size_t personalizationLength, bool treeMode, unsigned int digestSize)
: m_state(1), m_block(1), m_digestSize(digestSize), m_treeMode(treeMode)
: m_digestSize(digestSize), m_keyLength(keyLength), m_treeMode(treeMode)
{
CRYPTOPP_ASSERT(keyLength <= MAX_KEYLENGTH);
CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE);
CRYPTOPP_ASSERT(saltLength <= SALTSIZE);
CRYPTOPP_ASSERT(personalizationLength <= PERSONALIZATIONSIZE);
UncheckedSetKey(key, static_cast<unsigned int>(keyLength), MakeParameters(Name::DigestSize(),(int)digestSize)(Name::TreeMode(),treeMode, false)
(Name::Salt(), ConstByteArrayParameter(salt, saltLength))(Name::Personalization(), ConstByteArrayParameter(personalization, personalizationLength)));
UncheckedSetKey(key, static_cast<unsigned int>(keyLength), MakeParameters
(Name::DigestSize(),(int)digestSize)
(Name::TreeMode(),treeMode)
(Name::Salt(), ConstByteArrayParameter(salt, saltLength))
(Name::Personalization(), ConstByteArrayParameter(personalization, personalizationLength)));
}
void BLAKE2s::UncheckedSetKey(const byte *key, unsigned int length, const CryptoPP::NameValuePairs& params)
{
if (key && length)
{
m_key.New(BLOCKSIZE);
std::memcpy(m_key, key, length);
std::memset(m_key + length, 0x00, BLOCKSIZE - length);
m_keyLength = length;
}
else
{
m_key.resize(0);
m_keyLength = 0;
}
m_digestSize = params.GetIntValueWithDefault(Name::DigestSize(), DIGESTSIZE);
m_state.Reset();
m_block.Reset(m_digestSize, m_keyLength);
(void)params.GetValue(Name::TreeMode(), m_treeMode);
ConstByteArrayParameter t;
if (params.GetValue(Name::Salt(), t) && t.begin() && t.size())
memcpy_s(m_block.salt(), SALTSIZE, t.begin(), t.size());
if (params.GetValue(Name::Personalization(), t) && t.begin() && t.size())
memcpy_s(m_block.personalization(), PERSONALIZATIONSIZE, t.begin(), t.size());
Restart();
}
void BLAKE2b::UncheckedSetKey(const byte *key, unsigned int length, const CryptoPP::NameValuePairs& params)
{
if (key && length)
{
m_key.New(BLOCKSIZE);
std::memcpy(m_key, key, length);
std::memset(m_key + length, 0x00, BLOCKSIZE - length);
m_keyLength = length;
}
else
{
m_key.resize(0);
m_keyLength = 0;
}
m_digestSize = params.GetIntValueWithDefault(Name::DigestSize(), DIGESTSIZE);
m_state.Reset();
m_block.Reset(m_digestSize, m_keyLength);
(void)params.GetValue(Name::TreeMode(), m_treeMode);
ConstByteArrayParameter t;
if (params.GetValue(Name::Salt(), t) && t.begin() && t.size())
memcpy_s(m_block.salt(), SALTSIZE, t.begin(), t.size());
if (params.GetValue(Name::Personalization(), t) && t.begin() && t.size())
memcpy_s(m_block.personalization(), PERSONALIZATIONSIZE, t.begin(), t.size());
Restart();
}
void BLAKE2s::Restart()
{
static const word32 zero[2] = {0,0};
Restart(*m_block.data(), zero);
Restart(m_block, zero);
}
void BLAKE2b::Restart()
{
static const word64 zero[2] = {0,0};
Restart(*m_block.data(), zero);
Restart(m_block, zero);
}
void BLAKE2s::Restart(const BLAKE2s_ParameterBlock& block, const word32 counter[2])
{
// We take a parameter block as a parameter to allow customized state.
// Avoid the copy of the parameter block when we are passing our own block.
if (&block != m_block.data())
{
memcpy_s(m_block.data(), sizeof(ParameterBlock), &block, sizeof(ParameterBlock));
m_block.data()->digestLength = (byte)m_digestSize;
m_block.data()->keyLength = (byte)m_key.size();
}
State& state = *m_state.data();
state.tf[0] = state.tf[1] = 0, state.tf[2] = state.tf[3] = 0, state.length = 0;
// We take a counter as a parameter to allow customized state.
m_state.Reset();
if (counter != NULLPTR)
{
state.tf[0] = counter[0];
state.tf[1] = counter[1];
word32* t = m_state.t();
t[0] = counter[0];
t[1] = counter[1];
}
// We take a parameter block as a parameter to allow customized state.
// Avoid the copy of the parameter block when we are passing our own block.
if (block.data() == m_block.data())
m_block.Reset(m_digestSize, m_keyLength);
else
{
std::memcpy(m_block.data(), block.data(), m_block.size());
m_block.m_data[BLAKE2s_ParameterBlock::DigestOff] = (byte)m_digestSize;
m_block.m_data[BLAKE2s_ParameterBlock::KeyOff] = (byte)m_keyLength;
}
const word32* iv = BLAKE2S_IV;
PutBlock<word32, LittleEndian, true> put(m_block.data(), &state.h[0]);
PutBlock<word32, LittleEndian, true> put(m_block.data(), m_state.h());
put(iv[0])(iv[1])(iv[2])(iv[3])(iv[4])(iv[5])(iv[6])(iv[7]);
// When BLAKE2 is keyed, the input stream is simply {key||message}. Key it
// during Restart to avoid FirstPut and friends. Key size == 0 means no key.
if (m_key.size())
Update(m_key, m_key.size());
// When BLAKE2 is keyed, the input stream is simply {key || 0 || message}.
// The key is padded to a full Blocksize with 0. Key it during Restart to
// avoid FirstPut and friends. Key size == 0 means no key.
if (m_keyLength)
Update(m_key, BLOCKSIZE);
}
void BLAKE2b::Restart(const BLAKE2b_ParameterBlock& block, const word64 counter[2])
{
// We take a parameter block as a parameter to allow customized state.
// Avoid the copy of the parameter block when we are passing our own block.
if (&block != m_block.data())
{
memcpy_s(m_block.data(), sizeof(ParameterBlock), &block, sizeof(ParameterBlock));
m_block.data()->digestLength = (byte)m_digestSize;
m_block.data()->keyLength = (byte)m_key.size();
}
State& state = *m_state.data();
state.tf[0] = state.tf[1] = 0, state.tf[2] = state.tf[3] = 0, state.length = 0;
// We take a counter as a parameter to allow customized state.
m_state.Reset();
if (counter != NULLPTR)
{
state.tf[0] = counter[0];
state.tf[1] = counter[1];
word64* t = m_state.t();
t[0] = counter[0];
t[1] = counter[1];
}
// We take a parameter block as a parameter to allow customized state.
// Avoid the copy of the parameter block when we are passing our own block.
if (block.data() == m_block.data())
m_block.Reset(m_digestSize, m_keyLength);
else
{
std::memcpy(m_block.data(), block.data(), m_block.size());
m_block.m_data[BLAKE2b_ParameterBlock::DigestOff] = (byte)m_digestSize;
m_block.m_data[BLAKE2b_ParameterBlock::KeyOff] = (byte)m_keyLength;
}
const word64* iv = BLAKE2B_IV;
PutBlock<word64, LittleEndian, true> put(m_block.data(), &state.h[0]);
PutBlock<word64, LittleEndian, true> put(m_block.data(), m_state.h());
put(iv[0])(iv[1])(iv[2])(iv[3])(iv[4])(iv[5])(iv[6])(iv[7]);
// When BLAKE2 is keyed, the input stream is simply {key||message}. Key it
// during Restart to avoid FirstPut and friends. Key size == 0 means no key.
if (m_key.size())
Update(m_key, m_key.size());
// When BLAKE2 is keyed, the input stream is simply {key || 0 || message}.
// The key is padded to a full Blocksize with 0. Key it during Restart to
// avoid FirstPut and friends. Key size == 0 means no key.
if (m_keyLength)
Update(m_key, BLOCKSIZE);
}
void BLAKE2s::Update(const byte *input, size_t length)
{
CRYPTOPP_ASSERT(!(input == NULLPTR && length != 0));
if (length == 0) { return; }
CRYPTOPP_ASSERT(input != NULLPTR || length == 0);
State& state = *m_state.data();
if (state.length + length > BLOCKSIZE)
if (length > BLOCKSIZE - m_state.m_len)
{
// Complete current block
const size_t fill = BLOCKSIZE - state.length;
memcpy_s(&state.buffer[state.length], fill, input, fill);
if (m_state.m_len != 0)
{
// Complete current block
const size_t fill = BLOCKSIZE - m_state.m_len;
std::memcpy(m_state.m_buf+m_state.m_len, input, fill);
IncrementCounter();
Compress(state.buffer);
state.length = 0;
IncrementCounter(BLOCKSIZE);
Compress(m_state.m_buf);
m_state.m_len = 0;
length -= fill, input += fill;
length -= fill, input += fill;
}
// Compress in-place to avoid copies
while (length > BLOCKSIZE)
{
IncrementCounter();
IncrementCounter(BLOCKSIZE);
Compress(input);
length -= BLOCKSIZE, input += BLOCKSIZE;
}
}
// Copy tail bytes
if (input && length)
if (length)
{
CRYPTOPP_ASSERT(length <= BLOCKSIZE - state.length);
memcpy_s(&state.buffer[state.length], length, input, length);
state.length += static_cast<unsigned int>(length);
CRYPTOPP_ASSERT(length <= BLOCKSIZE - m_state.m_len);
std::memcpy(m_state.m_buf+m_state.m_len, input, length);
m_state.m_len += static_cast<unsigned int>(length);
}
}
void BLAKE2b::Update(const byte *input, size_t length)
{
CRYPTOPP_ASSERT(!(input == NULLPTR && length != 0));
if (length == 0) { return; }
CRYPTOPP_ASSERT(input != NULLPTR || length == 0);
State& state = *m_state.data();
if (state.length + length > BLOCKSIZE)
if (length > BLOCKSIZE - m_state.m_len)
{
// Complete current block
const size_t fill = BLOCKSIZE - state.length;
memcpy_s(&state.buffer[state.length], fill, input, fill);
if (m_state.m_len != 0)
{
// Complete current block
const size_t fill = BLOCKSIZE - m_state.m_len;
std::memcpy(m_state.m_buf+m_state.m_len, input, fill);
IncrementCounter();
Compress(state.buffer);
state.length = 0;
IncrementCounter(BLOCKSIZE);
Compress(m_state.m_buf);
m_state.m_len = 0;
length -= fill, input += fill;
length -= fill, input += fill;
}
// Compress in-place to avoid copies
while (length > BLOCKSIZE)
{
IncrementCounter();
CRYPTOPP_ASSERT(m_state.m_len == 0);
IncrementCounter(BLOCKSIZE);
Compress(input);
length -= BLOCKSIZE, input += BLOCKSIZE;
}
}
// Copy tail bytes
if (input && length)
if (length)
{
CRYPTOPP_ASSERT(length <= BLOCKSIZE - state.length);
memcpy_s(&state.buffer[state.length], length, input, length);
state.length += static_cast<unsigned int>(length);
CRYPTOPP_ASSERT(length <= BLOCKSIZE - m_state.m_len);
std::memcpy(m_state.m_buf + m_state.m_len, input, length);
m_state.m_len += static_cast<unsigned int>(length);
}
}
@ -593,23 +585,23 @@ void BLAKE2s::TruncatedFinal(byte *hash, size_t size)
{
CRYPTOPP_ASSERT(hash != NULLPTR);
this->ThrowIfInvalidTruncatedSize(size);
word32* f = m_state.f();
// Set last block unconditionally
State& state = *m_state.data();
state.tf[2] = ~static_cast<word32>(0);
f[0] = ~static_cast<word32>(0);
// Set last node if tree mode
if (m_treeMode)
state.tf[3] = ~static_cast<word32>(0);
f[1] = ~static_cast<word32>(0);
// Increment counter for tail bytes only
IncrementCounter(state.length);
IncrementCounter(m_state.m_len);
std::memset(state.buffer + state.length, 0x00, BLOCKSIZE - state.length);
Compress(state.buffer);
std::memset(m_state.m_buf + m_state.m_len, 0x00, BLOCKSIZE - m_state.m_len);
Compress(m_state.m_buf);
// Copy to caller buffer
memcpy_s(hash, size, &state.h[0], size);
std::memcpy(hash, m_state.h(), size);
Restart();
}
@ -618,39 +610,39 @@ void BLAKE2b::TruncatedFinal(byte *hash, size_t size)
{
CRYPTOPP_ASSERT(hash != NULLPTR);
this->ThrowIfInvalidTruncatedSize(size);
word64* f = m_state.f();
// Set last block unconditionally
State& state = *m_state.data();
state.tf[2] = ~static_cast<word64>(0);
f[0] = ~static_cast<word64>(0);
// Set last node if tree mode
if (m_treeMode)
state.tf[3] = ~static_cast<word64>(0);
f[1] = ~static_cast<word64>(0);
// Increment counter for tail bytes only
IncrementCounter(state.length);
IncrementCounter(m_state.m_len);
std::memset(state.buffer + state.length, 0x00, BLOCKSIZE - state.length);
Compress(state.buffer);
std::memset(m_state.m_buf + m_state.m_len, 0x00, BLOCKSIZE - m_state.m_len);
Compress(m_state.m_buf);
// Copy to caller buffer
memcpy_s(hash, size, &state.h[0], size);
std::memcpy(hash, m_state.h(), size);
Restart();
}
void BLAKE2s::IncrementCounter(size_t count)
{
State& state = *m_state.data();
state.tf[0] += static_cast<word32>(count);
state.tf[1] += !!(state.tf[0] < count);
word32* t = m_state.t();
t[0] += static_cast<word32>(count);
t[1] += !!(t[0] < count);
}
void BLAKE2b::IncrementCounter(size_t count)
{
State& state = *m_state.data();
state.tf[0] += static_cast<word64>(count);
state.tf[1] += !!(state.tf[0] < count);
word64* t = m_state.t();
t[0] += static_cast<word64>(count);
t[1] += !!(t[0] < count);
}
void BLAKE2s::Compress(const byte *input)
@ -658,27 +650,27 @@ void BLAKE2s::Compress(const byte *input)
#if CRYPTOPP_SSE41_AVAILABLE
if(HasSSE41())
{
return BLAKE2_Compress32_SSE4(input, *m_state.data());
return BLAKE2_Compress32_SSE4(input, m_state);
}
#endif
#if CRYPTOPP_ARM_NEON_AVAILABLE
if(HasNEON())
{
return BLAKE2_Compress32_NEON(input, *m_state.data());
return BLAKE2_Compress32_NEON(input, m_state);
}
#endif
#if CRYPTOPP_POWER7_AVAILABLE
if(HasPower7())
{
return BLAKE2_Compress32_POWER7(input, *m_state.data());
return BLAKE2_Compress32_POWER7(input, m_state);
}
#elif CRYPTOPP_ALTIVEC_AVAILABLE
if(HasAltivec())
{
return BLAKE2_Compress32_ALTIVEC(input, *m_state.data());
return BLAKE2_Compress32_ALTIVEC(input, m_state);
}
#endif
return BLAKE2_Compress32_CXX(input, *m_state.data());
return BLAKE2_Compress32_CXX(input, m_state);
}
void BLAKE2b::Compress(const byte *input)
@ -686,22 +678,22 @@ void BLAKE2b::Compress(const byte *input)
#if CRYPTOPP_SSE41_AVAILABLE
if(HasSSE41())
{
return BLAKE2_Compress64_SSE4(input, *m_state.data());
return BLAKE2_Compress64_SSE4(input, m_state);
}
#endif
#if CRYPTOPP_ARM_NEON_AVAILABLE
if(HasNEON())
{
return BLAKE2_Compress64_NEON(input, *m_state.data());
return BLAKE2_Compress64_NEON(input, m_state);
}
#endif
#if CRYPTOPP_POWER8_AVAILABLE
if(HasPower8())
{
return BLAKE2_Compress64_POWER8(input, *m_state.data());
return BLAKE2_Compress64_POWER8(input, m_state);
}
#endif
return BLAKE2_Compress64_CXX(input, *m_state.data());
return BLAKE2_Compress64_CXX(input, m_state);
}
void BLAKE2_Compress64_CXX(const byte* input, BLAKE2b_State& state)
@ -711,18 +703,19 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2b_State& state)
GetBlock<word64, LittleEndian, true> get1(input);
get1(m[0])(m[1])(m[2])(m[3])(m[4])(m[5])(m[6])(m[7])(m[8])(m[9])(m[10])(m[11])(m[12])(m[13])(m[14])(m[15]);
GetBlock<word64, LittleEndian, true> get2(&state.h[0]);
GetBlock<word64, LittleEndian, true> get2(state.h());
get2(v[0])(v[1])(v[2])(v[3])(v[4])(v[5])(v[6])(v[7]);
const word64* iv = BLAKE2B_IV;
const word64* tf = state.t();
v[ 8] = iv[0];
v[ 9] = iv[1];
v[10] = iv[2];
v[11] = iv[3];
v[12] = state.tf[0] ^ iv[4];
v[13] = state.tf[1] ^ iv[5];
v[14] = state.tf[2] ^ iv[6];
v[15] = state.tf[3] ^ iv[7];
v[12] = tf[0] ^ iv[4];
v[13] = tf[1] ^ iv[5];
v[14] = tf[2] ^ iv[6];
v[15] = tf[3] ^ iv[7];
BLAKE2B_ROUND<0>(m, v);
BLAKE2B_ROUND<1>(m, v);
@ -737,8 +730,9 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2b_State& state)
BLAKE2B_ROUND<10>(m, v);
BLAKE2B_ROUND<11>(m, v);
for(unsigned int i = 0; i < 8; ++i)
state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]);
word64* h = state.h();
for (unsigned int i = 0; i < 8; ++i)
h[i] = h[i] ^ ConditionalByteReverse(LITTLE_ENDIAN_ORDER, v[i] ^ v[i + 8]);
}
void BLAKE2_Compress32_CXX(const byte* input, BLAKE2s_State& state)
@ -748,18 +742,19 @@ void BLAKE2_Compress32_CXX(const byte* input, BLAKE2s_State& state)
GetBlock<word32, LittleEndian, true> get1(input);
get1(m[0])(m[1])(m[2])(m[3])(m[4])(m[5])(m[6])(m[7])(m[8])(m[9])(m[10])(m[11])(m[12])(m[13])(m[14])(m[15]);
GetBlock<word32, LittleEndian, true> get2(&state.h[0]);
GetBlock<word32, LittleEndian, true> get2(state.h());
get2(v[0])(v[1])(v[2])(v[3])(v[4])(v[5])(v[6])(v[7]);
const word32* iv = BLAKE2S_IV;
const word32* tf = state.t();
v[ 8] = iv[0];
v[ 9] = iv[1];
v[10] = iv[2];
v[11] = iv[3];
v[12] = state.tf[0] ^ iv[4];
v[13] = state.tf[1] ^ iv[5];
v[14] = state.tf[2] ^ iv[6];
v[15] = state.tf[3] ^ iv[7];
v[12] = tf[0] ^ iv[4];
v[13] = tf[1] ^ iv[5];
v[14] = tf[2] ^ iv[6];
v[15] = tf[3] ^ iv[7];
BLAKE2S_ROUND<0>(m, v);
BLAKE2S_ROUND<1>(m, v);
@ -772,8 +767,9 @@ void BLAKE2_Compress32_CXX(const byte* input, BLAKE2s_State& state)
BLAKE2S_ROUND<8>(m, v);
BLAKE2S_ROUND<9>(m, v);
for(unsigned int i = 0; i < 8; ++i)
state.h[i] = state.h[i] ^ ConditionalByteReverse(LittleEndian::ToEnum(), v[i] ^ v[i + 8]);
word32* h = state.h();
for (unsigned int i = 0; i < 8; ++i)
h[i] = h[i] ^ ConditionalByteReverse(LITTLE_ENDIAN_ORDER, v[i] ^ v[i + 8]);
}
NAMESPACE_END

180
blake2.h
View File

@ -65,29 +65,46 @@ struct CRYPTOPP_NO_VTABLE BLAKE2s_ParameterBlock
BLAKE2s_ParameterBlock()
{
memset(this, 0x00, sizeof(*this));
digestLength = DIGESTSIZE;
fanout = depth = 1;
Reset();
}
BLAKE2s_ParameterBlock(size_t digestSize)
{
CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE);
memset(this, 0x00, sizeof(*this));
digestLength = (byte)digestSize;
fanout = depth = 1;
Reset(digestSize);
}
BLAKE2s_ParameterBlock(size_t digestSize, size_t keyLength, const byte* salt, size_t saltLength,
const byte* personalization, size_t personalizationLength);
byte digestLength;
byte keyLength, fanout, depth;
byte leafLength[4];
byte nodeOffset[6];
byte nodeDepth, innerLength;
byte salt[SALTSIZE];
byte personalization[PERSONALIZATIONSIZE];
void Reset(size_t digestLength=DIGESTSIZE, size_t keyLength=0);
byte* data() {
return m_data.data();
}
const byte* data() const {
return m_data.data();
}
size_t size() const {
return m_data.size();
}
byte* salt() {
return m_data + SaltOff;
}
byte* personalization() {
return m_data + PersonalizationOff;
}
// Offsets into the byte array
enum {
DigestOff = 0, KeyOff = 1, FanoutOff = 2, DepthOff = 3, LeafOff = 4, NodeOff = 8,
NodeDepthOff = 14, InnerOff = 15, SaltOff = 16, PersonalizationOff = 24
};
FixedSizeAlignedSecBlock<byte, 32, true> m_data;
};
/// \brief BLAKE2b parameter block
@ -99,65 +116,112 @@ struct CRYPTOPP_NO_VTABLE BLAKE2b_ParameterBlock
BLAKE2b_ParameterBlock()
{
memset(this, 0x00, sizeof(*this));
digestLength = DIGESTSIZE;
fanout = depth = 1;
Reset();
}
BLAKE2b_ParameterBlock(size_t digestSize)
{
CRYPTOPP_ASSERT(digestSize <= DIGESTSIZE);
memset(this, 0x00, sizeof(*this));
digestLength = (byte)digestSize;
fanout = depth = 1;
Reset(digestSize);
}
BLAKE2b_ParameterBlock(size_t digestSize, size_t keyLength, const byte* salt, size_t saltLength,
const byte* personalization, size_t personalizationLength);
byte digestLength;
byte keyLength, fanout, depth;
byte leafLength[4];
byte nodeOffset[8];
byte nodeDepth, innerLength, rfu[14];
byte salt[SALTSIZE];
byte personalization[PERSONALIZATIONSIZE];
void Reset(size_t digestLength=DIGESTSIZE, size_t keyLength=0);
byte* data() {
return m_data.data();
}
const byte* data() const {
return m_data.data();
}
size_t size() const {
return m_data.size();
}
byte* salt() {
return m_data + SaltOff;
}
byte* personalization() {
return m_data + PersonalizationOff;
}
// Offsets into the byte array
enum {
DigestOff = 0, KeyOff = 1, FanoutOff = 2, DepthOff = 3, LeafOff = 4, NodeOff = 8,
NodeDepthOff = 16, InnerOff = 17, RfuOff = 18, SaltOff = 32, PersonalizationOff = 48
};
FixedSizeAlignedSecBlock<byte, 64, true> m_data;
};
/// \brief BLAKE2s state information
/// \since Crypto++ 5.6.4
struct CRYPTOPP_NO_VTABLE BLAKE2s_State
{
BLAKE2s_State()
{
// Set all members except scratch buffer[]
h[0]=h[1]=h[2]=h[3]=h[4]=h[5]=h[6]=h[7] = 0;
tf[0]=tf[1]=tf[2]=tf[3] = 0;
length = 0;
BLAKE2s_State() {
Reset();
}
void Reset();
inline word32* h() {
return m_hft.data();
}
inline word32* t() {
return m_hft.data() + 8;
}
inline word32* f() {
return m_hft.data() + 10;
}
inline byte* data() {
return m_buf.data();
}
// SSE4, Power7 and NEON depend upon t[] and f[] being side-by-side
word32 h[8], tf[4]; // t[2], f[2];
byte buffer[BLAKE2s_Info::BLOCKSIZE];
size_t length;
CRYPTOPP_CONSTANT(BLOCKSIZE = BLAKE2s_Info::BLOCKSIZE);
FixedSizeAlignedSecBlock<word32, 8+2+2, true> m_hft;
FixedSizeAlignedSecBlock<byte, BLOCKSIZE, true> m_buf;
size_t m_len;
};
/// \brief BLAKE2b state information
/// \since Crypto++ 5.6.4
struct CRYPTOPP_NO_VTABLE BLAKE2b_State
{
BLAKE2b_State()
{
// Set all members except scratch buffer[]
h[0]=h[1]=h[2]=h[3]=h[4]=h[5]=h[6]=h[7] = 0;
tf[0]=tf[1]=tf[2]=tf[3] = 0;
length = 0;
BLAKE2b_State() {
Reset();
}
void Reset();
inline word64* h() {
return m_hft.data();
}
inline word64* t() {
return m_hft.data() + 8;
}
inline word64* f() {
return m_hft.data() + 10;
}
inline byte* data() {
return m_buf.data();
}
// SSE4, Power8 and NEON depend upon t[] and f[] being side-by-side
word64 h[8], tf[4]; // t[2], f[2];
byte buffer[BLAKE2b_Info::BLOCKSIZE];
size_t length;
CRYPTOPP_CONSTANT(BLOCKSIZE = BLAKE2b_Info::BLOCKSIZE);
FixedSizeAlignedSecBlock<word64, 8+2+2, true> m_hft;
FixedSizeAlignedSecBlock<byte, BLOCKSIZE, true> m_buf;
size_t m_len;
};
/// \brief The BLAKE2s cryptographic hash function
@ -184,8 +248,6 @@ public:
typedef BLAKE2s_State State;
typedef BLAKE2s_ParameterBlock ParameterBlock;
typedef SecBlock<State, AllocatorWithCleanup<State, true> > AlignedState;
typedef SecBlock<ParameterBlock, AllocatorWithCleanup<ParameterBlock, true> > AlignedParameterBlock;
CRYPTOPP_STATIC_CONSTEXPR const char* StaticAlgorithmName() {return "BLAKE2s";}
@ -214,10 +276,10 @@ public:
/// \details Object algorithm name follows the naming described in
/// <A HREF="http://tools.ietf.org/html/rfc7693#section-4">RFC 7693, The BLAKE2 Cryptographic Hash and
/// Message Authentication Code (MAC)</A>. For example, "BLAKE2b-512" and "BLAKE2s-256".
std::string AlgorithmName() const {return std::string(BLAKE2s_Info::StaticAlgorithmName()) + "-" + IntToString(this->DigestSize()*8);}
std::string AlgorithmName() const {return std::string(BLAKE2s_Info::StaticAlgorithmName()) + "-" + IntToString(DigestSize()*8);}
unsigned int DigestSize() const {return m_digestSize;}
unsigned int OptimalDataAlignment() const {return (CRYPTOPP_BOOL_ALIGN16 ? 16 : GetAlignmentOf<word32>());}
unsigned int OptimalDataAlignment() const;
void Update(const byte *input, size_t length);
void Restart();
@ -253,10 +315,10 @@ protected:
void UncheckedSetKey(const byte* key, unsigned int length, const CryptoPP::NameValuePairs& params);
private:
AlignedState m_state;
AlignedParameterBlock m_block;
State m_state;
ParameterBlock m_block;
AlignedSecByteBlock m_key;
word32 m_digestSize;
word32 m_digestSize, m_keyLength;
bool m_treeMode;
};
@ -284,8 +346,6 @@ public:
typedef BLAKE2b_State State;
typedef BLAKE2b_ParameterBlock ParameterBlock;
typedef SecBlock<State, AllocatorWithCleanup<State, true> > AlignedState;
typedef SecBlock<ParameterBlock, AllocatorWithCleanup<ParameterBlock, true> > AlignedParameterBlock;
CRYPTOPP_STATIC_CONSTEXPR const char* StaticAlgorithmName() {return "BLAKE2b";}
@ -314,10 +374,10 @@ public:
/// \details Object algorithm name follows the naming described in
/// <A HREF="http://tools.ietf.org/html/rfc7693#section-4">RFC 7693, The BLAKE2 Cryptographic Hash and
/// Message Authentication Code (MAC)</A>. For example, "BLAKE2b-512" and "BLAKE2s-256".
std::string AlgorithmName() const {return std::string(BLAKE2b_Info::StaticAlgorithmName()) + "-" + IntToString(this->DigestSize()*8);}
std::string AlgorithmName() const {return std::string(BLAKE2b_Info::StaticAlgorithmName()) + "-" + IntToString(DigestSize()*8);}
unsigned int DigestSize() const {return m_digestSize;}
unsigned int OptimalDataAlignment() const {return (CRYPTOPP_BOOL_ALIGN16 ? 16 : GetAlignmentOf<word64>());}
unsigned int OptimalDataAlignment() const;
void Update(const byte *input, size_t length);
void Restart();
@ -354,10 +414,10 @@ protected:
void UncheckedSetKey(const byte* key, unsigned int length, const CryptoPP::NameValuePairs& params);
private:
AlignedState m_state;
AlignedParameterBlock m_block;
State m_state;
ParameterBlock m_block;
AlignedSecByteBlock m_key;
word32 m_digestSize;
word32 m_digestSize, m_keyLength;
bool m_treeMode;
};

View File

@ -45,6 +45,9 @@
# include "ppc_simd.h"
#endif
// Squash MS LNK4221 and libtool warnings
extern const char BLAKE2B_SIMD_FNAME[] = __FILE__;
NAMESPACE_BEGIN(CryptoPP)
// Exported by blake2.cpp
@ -451,14 +454,14 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2b_State& state)
const __m128i m6 = LOADU(input + 96);
const __m128i m7 = LOADU(input + 112);
row1l = LOADU(&state.h[0]);
row1h = LOADU(&state.h[2]);
row2l = LOADU(&state.h[4]);
row2h = LOADU(&state.h[6]);
row3l = LOADU(&BLAKE2B_IV[0]);
row3h = LOADU(&BLAKE2B_IV[2]);
row4l = _mm_xor_si128(LOADU(&BLAKE2B_IV[4]), LOADU(&state.tf[0]));
row4h = _mm_xor_si128(LOADU(&BLAKE2B_IV[6]), LOADU(&state.tf[2]));
row1l = LOADU(state.h()+0);
row1h = LOADU(state.h()+2);
row2l = LOADU(state.h()+4);
row2h = LOADU(state.h()+6);
row3l = LOADU(BLAKE2B_IV+0);
row3h = LOADU(BLAKE2B_IV+2);
row4l = _mm_xor_si128(LOADU(BLAKE2B_IV+4), LOADU(state.t()+0));
row4h = _mm_xor_si128(LOADU(BLAKE2B_IV+6), LOADU(state.f()+0));
BLAKE2B_ROUND(0);
BLAKE2B_ROUND(1);
@ -475,12 +478,12 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2b_State& state)
row1l = _mm_xor_si128(row3l, row1l);
row1h = _mm_xor_si128(row3h, row1h);
STOREU(&state.h[0], _mm_xor_si128(LOADU(&state.h[0]), row1l));
STOREU(&state.h[2], _mm_xor_si128(LOADU(&state.h[2]), row1h));
STOREU(state.h()+0, _mm_xor_si128(LOADU(state.h()+0), row1l));
STOREU(state.h()+2, _mm_xor_si128(LOADU(state.h()+2), row1h));
row2l = _mm_xor_si128(row4l, row2l);
row2h = _mm_xor_si128(row4h, row2h);
STOREU(&state.h[4], _mm_xor_si128(LOADU(&state.h[4]), row2l));
STOREU(&state.h[6], _mm_xor_si128(LOADU(&state.h[6]), row2h));
STOREU(state.h()+4, _mm_xor_si128(LOADU(state.h()+4), row2l));
STOREU(state.h()+6, _mm_xor_si128(LOADU(state.h()+6), row2h));
}
#endif // CRYPTOPP_SSE41_AVAILABLE
@ -710,15 +713,15 @@ void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state)
uint64x2_t row1l, row1h, row2l, row2h;
uint64x2_t row3l, row3h, row4l, row4h;
const uint64x2_t h0 = row1l = vld1q_u64(&state.h[0]);
const uint64x2_t h1 = row1h = vld1q_u64(&state.h[2]);
const uint64x2_t h2 = row2l = vld1q_u64(&state.h[4]);
const uint64x2_t h3 = row2h = vld1q_u64(&state.h[6]);
const uint64x2_t h0 = row1l = vld1q_u64(state.h()+0);
const uint64x2_t h1 = row1h = vld1q_u64(state.h()+2);
const uint64x2_t h2 = row2l = vld1q_u64(state.h()+4);
const uint64x2_t h3 = row2h = vld1q_u64(state.h()+6);
row3l = vld1q_u64(&BLAKE2B_IV[0]);
row3h = vld1q_u64(&BLAKE2B_IV[2]);
row4l = veorq_u64(vld1q_u64(&BLAKE2B_IV[4]), vld1q_u64(&state.tf[0]));
row4h = veorq_u64(vld1q_u64(&BLAKE2B_IV[6]), vld1q_u64(&state.tf[2]));
row3l = vld1q_u64(BLAKE2B_IV+0);
row3h = vld1q_u64(BLAKE2B_IV+2);
row4l = veorq_u64(vld1q_u64(BLAKE2B_IV+4), vld1q_u64(state.t()+0));
row4h = veorq_u64(vld1q_u64(BLAKE2B_IV+6), vld1q_u64(state.f()+0));
BLAKE2B_ROUND(0);
BLAKE2B_ROUND(1);
@ -733,10 +736,10 @@ void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state)
BLAKE2B_ROUND(10);
BLAKE2B_ROUND(11);
vst1q_u64(&state.h[0], veorq_u64(h0, veorq_u64(row1l, row3l)));
vst1q_u64(&state.h[2], veorq_u64(h1, veorq_u64(row1h, row3h)));
vst1q_u64(&state.h[4], veorq_u64(h2, veorq_u64(row2l, row4l)));
vst1q_u64(&state.h[6], veorq_u64(h3, veorq_u64(row2h, row4h)));
vst1q_u64(state.h()+0, veorq_u64(h0, veorq_u64(row1l, row3l)));
vst1q_u64(state.h()+2, veorq_u64(h1, veorq_u64(row1h, row3h)));
vst1q_u64(state.h()+4, veorq_u64(h2, veorq_u64(row2l, row4l)));
vst1q_u64(state.h()+6, veorq_u64(h3, veorq_u64(row2h, row4h)));
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
@ -1187,15 +1190,15 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
uint64x2_p row1l, row1h, row2l, row2h;
uint64x2_p row3l, row3h, row4l, row4h;
const uint64x2_p h0 = row1l = VecLoad64LE(&state.h[0]);
const uint64x2_p h1 = row1h = VecLoad64LE(&state.h[2]);
const uint64x2_p h2 = row2l = VecLoad64LE(&state.h[4]);
const uint64x2_p h3 = row2h = VecLoad64LE(&state.h[6]);
const uint64x2_p h0 = row1l = VecLoad64LE(state.h()+0);
const uint64x2_p h1 = row1h = VecLoad64LE(state.h()+2);
const uint64x2_p h2 = row2l = VecLoad64LE(state.h()+4);
const uint64x2_p h3 = row2h = VecLoad64LE(state.h()+6);
row3l = VecLoad64(&BLAKE2B_IV[0]);
row3h = VecLoad64(&BLAKE2B_IV[2]);
row4l = VecXor(VecLoad64(&BLAKE2B_IV[4]), VecLoad64(&state.tf[0]));
row4h = VecXor(VecLoad64(&BLAKE2B_IV[6]), VecLoad64(&state.tf[2]));
row3l = VecLoad64(BLAKE2B_IV+0);
row3h = VecLoad64(BLAKE2B_IV+2);
row4l = VecXor(VecLoad64(BLAKE2B_IV+4), VecLoad64(state.t()+0));
row4h = VecXor(VecLoad64(BLAKE2B_IV+6), VecLoad64(state.f()+0));
BLAKE2B_ROUND(0);
BLAKE2B_ROUND(1);
@ -1210,10 +1213,10 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
BLAKE2B_ROUND(10);
BLAKE2B_ROUND(11);
VecStore64LE(&state.h[0], VecXor(h0, VecXor(row1l, row3l)));
VecStore64LE(&state.h[2], VecXor(h1, VecXor(row1h, row3h)));
VecStore64LE(&state.h[4], VecXor(h2, VecXor(row2l, row4l)));
VecStore64LE(&state.h[6], VecXor(h3, VecXor(row2h, row4h)));
VecStore64LE(state.h()+0, VecXor(h0, VecXor(row1l, row3l)));
VecStore64LE(state.h()+2, VecXor(h1, VecXor(row1h, row3h)));
VecStore64LE(state.h()+4, VecXor(h2, VecXor(row2l, row4l)));
VecStore64LE(state.h()+6, VecXor(h3, VecXor(row2h, row4h)));
}
#endif // CRYPTOPP_POWER8_AVAILABLE

View File

@ -55,6 +55,9 @@
# include "ppc_simd.h"
#endif
// Squash MS LNK4221 and libtool warnings
extern const char BLAKE2S_SIMD_FNAME[] = __FILE__;
NAMESPACE_BEGIN(CryptoPP)
// Exported by blake2.cpp
@ -342,10 +345,10 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2s_State& state)
const __m128i m2 = LOADU(input + 32);
const __m128i m3 = LOADU(input + 48);
row1 = ff0 = LOADU(&state.h[0]);
row2 = ff1 = LOADU(&state.h[4]);
row3 = LOADU(&BLAKE2S_IV[0]);
row4 = _mm_xor_si128(LOADU(&BLAKE2S_IV[4]), LOADU(&state.tf[0]));
row1 = ff0 = LOADU(state.h()+0);
row2 = ff1 = LOADU(state.h()+4);
row3 = LOADU(BLAKE2S_IV+0);
row4 = _mm_xor_si128(LOADU(&BLAKE2S_IV[4]), LOADU(state.t()+0));
BLAKE2S_ROUND(0);
BLAKE2S_ROUND(1);
@ -358,8 +361,8 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2s_State& state)
BLAKE2S_ROUND(8);
BLAKE2S_ROUND(9);
STOREU(&state.h[0], _mm_xor_si128(ff0, _mm_xor_si128(row1, row3)));
STOREU(&state.h[4], _mm_xor_si128(ff1, _mm_xor_si128(row2, row4)));
STOREU(state.h()+0, _mm_xor_si128(ff0, _mm_xor_si128(row1, row3)));
STOREU(state.h()+4, _mm_xor_si128(ff1, _mm_xor_si128(row2, row4)));
}
#endif // CRYPTOPP_SSE41_AVAILABLE
@ -660,10 +663,10 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state)
uint32x4_t row1, row2, row3, row4;
const uint32x4_t f0 = row1 = vld1q_u32(&state.h[0]);
const uint32x4_t f1 = row2 = vld1q_u32(&state.h[4]);
row3 = vld1q_u32(&BLAKE2S_IV[0]);
row4 = veorq_u32(vld1q_u32(&BLAKE2S_IV[4]), vld1q_u32(&state.tf[0]));
const uint32x4_t f0 = row1 = vld1q_u32(state.h()+0);
const uint32x4_t f1 = row2 = vld1q_u32(state.h()+4);
row3 = vld1q_u32(BLAKE2S_IV+0);
row4 = veorq_u32(vld1q_u32(&BLAKE2S_IV[4]), vld1q_u32(state.t()+0));
BLAKE2S_ROUND(0);
BLAKE2S_ROUND(1);
@ -676,8 +679,8 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state)
BLAKE2S_ROUND(8);
BLAKE2S_ROUND(9);
vst1q_u32(&state.h[0], veorq_u32(f0, veorq_u32(row1, row3)));
vst1q_u32(&state.h[4], veorq_u32(f1, veorq_u32(row2, row4)));
vst1q_u32(state.h()+0, veorq_u32(f0, veorq_u32(row1, row3)));
vst1q_u32(state.h()+4, veorq_u32(f1, veorq_u32(row2, row4)));
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
@ -983,10 +986,10 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state)
const uint32x4_p m8 = VecLoad32LE(input + 32);
const uint32x4_p m12 = VecLoad32LE(input + 48);
row1 = ff0 = VecLoad32LE(&state.h[0]);
row2 = ff1 = VecLoad32LE(&state.h[4]);
row3 = VecLoad32(&BLAKE2S_IV[0]);
row4 = VecXor(VecLoad32(&BLAKE2S_IV[4]), VecLoad32(&state.tf[0]));
row1 = ff0 = VecLoad32LE(state.h()+0);
row2 = ff1 = VecLoad32LE(state.h()+4);
row3 = VecLoad32(BLAKE2S_IV+0);
row4 = VecXor(VecLoad32(&BLAKE2S_IV[4]), VecLoad32(state.t()+0));
BLAKE2S_ROUND(0);
BLAKE2S_ROUND(1);
@ -999,8 +1002,8 @@ void BLAKE2_Compress32_CORE(const byte* input, BLAKE2s_State& state)
BLAKE2S_ROUND(8);
BLAKE2S_ROUND(9);
VecStore32LE(&state.h[0], VecXor(ff0, VecXor(row1, row3)));
VecStore32LE(&state.h[4], VecXor(ff1, VecXor(row2, row4)));
VecStore32LE(state.h()+0, VecXor(ff0, VecXor(row1, row3)));
VecStore32LE(state.h()+4, VecXor(ff1, VecXor(row2, row4)));
}
#endif // CRYPTOPP_POWER7_AVAILABLE || CRYPTOPP_ALTIVEC_AVAILABLE