Fix disjoint t[] and f[] when using SIMD implementations

This commit is contained in:
Jeffrey Walton 2018-11-03 10:49:22 -04:00
parent 600e2a8be4
commit bdeaae3ac9
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
4 changed files with 37 additions and 37 deletions

View File

@ -455,12 +455,12 @@ void BLAKE2s::Restart(const BLAKE2s_ParameterBlock& block, const word32 counter[
}
State& state = *m_state.data();
state.t[0] = state.t[1] = 0, state.f[0] = state.f[1] = 0, state.length = 0;
state.tf[0] = state.tf[1] = 0, state.tf[2] = state.tf[3] = 0, state.length = 0;
if (counter != NULLPTR)
{
state.t[0] = counter[0];
state.t[1] = counter[1];
state.tf[0] = counter[0];
state.tf[1] = counter[1];
}
const word32* iv = BLAKE2S_IV;
@ -486,12 +486,12 @@ void BLAKE2b::Restart(const BLAKE2b_ParameterBlock& block, const word64 counter[
}
State& state = *m_state.data();
state.t[0] = state.t[1] = 0, state.f[0] = state.f[1] = 0, state.length = 0;
state.tf[0] = state.tf[1] = 0, state.tf[2] = state.tf[3] = 0, state.length = 0;
if (counter != NULLPTR)
{
state.t[0] = counter[0];
state.t[1] = counter[1];
state.tf[0] = counter[0];
state.tf[1] = counter[1];
}
const word64* iv = BLAKE2B_IV;
@ -584,11 +584,11 @@ void BLAKE2s::TruncatedFinal(byte *hash, size_t size)
// Set last block unconditionally
State& state = *m_state.data();
state.f[0] = ~static_cast<word32>(0);
state.tf[2] = ~static_cast<word32>(0);
// Set last node if tree mode
if (m_treeMode)
state.f[1] = ~static_cast<word32>(0);
state.tf[3] = ~static_cast<word32>(0);
// Increment counter for tail bytes only
IncrementCounter(state.length);
@ -609,11 +609,11 @@ void BLAKE2b::TruncatedFinal(byte *hash, size_t size)
// Set last block unconditionally
State& state = *m_state.data();
state.f[0] = ~static_cast<word64>(0);
state.tf[2] = ~static_cast<word64>(0);
// Set last node if tree mode
if (m_treeMode)
state.f[1] = ~static_cast<word64>(0);
state.tf[3] = ~static_cast<word64>(0);
// Increment counter for tail bytes only
IncrementCounter(state.length);
@ -630,15 +630,15 @@ void BLAKE2b::TruncatedFinal(byte *hash, size_t size)
void BLAKE2s::IncrementCounter(size_t count)
{
State& state = *m_state.data();
state.t[0] += static_cast<word32>(count);
state.t[1] += !!(state.t[0] < count);
state.tf[0] += static_cast<word32>(count);
state.tf[1] += !!(state.tf[0] < count);
}
void BLAKE2b::IncrementCounter(size_t count)
{
State& state = *m_state.data();
state.t[0] += static_cast<word64>(count);
state.t[1] += !!(state.t[0] < count);
state.tf[0] += static_cast<word64>(count);
state.tf[1] += !!(state.tf[0] < count);
}
void BLAKE2s::Compress(const byte *input)
@ -702,10 +702,10 @@ void BLAKE2_Compress64_CXX(const byte* input, BLAKE2b_State& state)
v[ 9] = iv[1];
v[10] = iv[2];
v[11] = iv[3];
v[12] = state.t[0] ^ iv[4];
v[13] = state.t[1] ^ iv[5];
v[14] = state.f[0] ^ iv[6];
v[15] = state.f[1] ^ iv[7];
v[12] = state.tf[0] ^ iv[4];
v[13] = state.tf[1] ^ iv[5];
v[14] = state.tf[2] ^ iv[6];
v[15] = state.tf[3] ^ iv[7];
BLAKE2B_ROUND<0>(m, v);
BLAKE2B_ROUND<1>(m, v);
@ -739,10 +739,10 @@ void BLAKE2_Compress32_CXX(const byte* input, BLAKE2s_State& state)
v[ 9] = iv[1];
v[10] = iv[2];
v[11] = iv[3];
v[12] = state.t[0] ^ iv[4];
v[13] = state.t[1] ^ iv[5];
v[14] = state.f[0] ^ iv[6];
v[15] = state.f[1] ^ iv[7];
v[12] = state.tf[0] ^ iv[4];
v[13] = state.tf[1] ^ iv[5];
v[14] = state.tf[2] ^ iv[6];
v[15] = state.tf[3] ^ iv[7];
BLAKE2S_ROUND<0>(m, v);
BLAKE2S_ROUND<1>(m, v);

View File

@ -134,12 +134,12 @@ struct CRYPTOPP_NO_VTABLE BLAKE2s_State
{
// Set all members except scratch buffer[]
h[0]=h[1]=h[2]=h[3]=h[4]=h[5]=h[6]=h[7] = 0;
t[0]=t[1]=f[0]=f[1] = 0;
tf[0]=tf[1]=tf[2]=tf[3] = 0;
length = 0;
}
// SSE2, SSE4 and NEON depend upon t[] and f[] being side-by-side
word32 h[8], t[2], f[2];
// SSE4, Power7 and NEON depend upon t[] and f[] being side-by-side
word32 h[8], tf[4]; // t[2], f[2];
byte buffer[BLAKE2s_Info::BLOCKSIZE];
size_t length;
};
@ -152,12 +152,12 @@ struct CRYPTOPP_NO_VTABLE BLAKE2b_State
{
// Set all members except scratch buffer[]
h[0]=h[1]=h[2]=h[3]=h[4]=h[5]=h[6]=h[7] = 0;
t[0]=t[1]=f[0]=f[1] = 0;
tf[0]=tf[1]=tf[2]=tf[3] = 0;
length = 0;
}
// SSE2, SSE4 and NEON depend upon t[] and f[] being side-by-side
word64 h[8], t[2], f[2];
// SSE4, Power8 and NEON depend upon t[] and f[] being side-by-side
word64 h[8], tf[4]; // t[2], f[2];
byte buffer[BLAKE2b_Info::BLOCKSIZE];
size_t length;
};

View File

@ -457,8 +457,8 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2b_State& state)
row2h = LOADU( &state.h[6] );
row3l = LOADU( &BLAKE2B_IV[0] );
row3h = LOADU( &BLAKE2B_IV[2] );
row4l = _mm_xor_si128( LOADU( &BLAKE2B_IV[4] ), LOADU( &state.t[0] ) );
row4h = _mm_xor_si128( LOADU( &BLAKE2B_IV[6] ), LOADU( &state.f[0] ) );
row4l = _mm_xor_si128( LOADU( &BLAKE2B_IV[4] ), LOADU( &state.tf[0] ) );
row4h = _mm_xor_si128( LOADU( &BLAKE2B_IV[6] ), LOADU( &state.tf[2] ) );
BLAKE2B_ROUND( 0 );
BLAKE2B_ROUND( 1 );
@ -717,8 +717,8 @@ void BLAKE2_Compress64_NEON(const byte* input, BLAKE2b_State& state)
row3l = vld1q_u64(&BLAKE2B_IV[0]);
row3h = vld1q_u64(&BLAKE2B_IV[2]);
row4l = veorq_u64(vld1q_u64(&BLAKE2B_IV[4]), vld1q_u64(&state.t[0]));
row4h = veorq_u64(vld1q_u64(&BLAKE2B_IV[6]), vld1q_u64(&state.f[0]));
row4l = veorq_u64(vld1q_u64(&BLAKE2B_IV[4]), vld1q_u64(&state.tf[0]));
row4h = veorq_u64(vld1q_u64(&BLAKE2B_IV[6]), vld1q_u64(&state.tf[2]));
BLAKE2B_ROUND(0);
BLAKE2B_ROUND(1);
@ -1194,8 +1194,8 @@ void BLAKE2_Compress64_POWER8(const byte* input, BLAKE2b_State& state)
row3l = VectorLoad64(&BLAKE2B_IV[0]);
row3h = VectorLoad64(&BLAKE2B_IV[2]);
row4l = vec_xor(VectorLoad64(&BLAKE2B_IV[4]), VectorLoad64(&state.t[0]));
row4h = vec_xor(VectorLoad64(&BLAKE2B_IV[6]), VectorLoad64(&state.f[0]));
row4l = vec_xor(VectorLoad64(&BLAKE2B_IV[4]), VectorLoad64(&state.tf[0]));
row4h = vec_xor(VectorLoad64(&BLAKE2B_IV[6]), VectorLoad64(&state.tf[2]));
BLAKE2B_ROUND(0);
BLAKE2B_ROUND(1);

View File

@ -335,7 +335,7 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2s_State& state)
row1 = ff0 = LOADU( &state.h[0] );
row2 = ff1 = LOADU( &state.h[4] );
row3 = LOADU( &BLAKE2S_IV[0] );
row4 = _mm_xor_si128( LOADU( &BLAKE2S_IV[4] ), LOADU( &state.t[0] ) );
row4 = _mm_xor_si128( LOADU( &BLAKE2S_IV[4] ), LOADU( &state.tf[0] ) );
BLAKE2S_ROUND( 0 );
BLAKE2S_ROUND( 1 );
@ -653,7 +653,7 @@ void BLAKE2_Compress32_NEON(const byte* input, BLAKE2s_State& state)
const uint32x4_t f0 = row1 = vld1q_u32(&state.h[0]);
const uint32x4_t f1 = row2 = vld1q_u32(&state.h[4]);
row3 = vld1q_u32(&BLAKE2S_IV[0]);
row4 = veorq_u32(vld1q_u32(&BLAKE2S_IV[4]), vld1q_u32(&state.t[0]));
row4 = veorq_u32(vld1q_u32(&BLAKE2S_IV[4]), vld1q_u32(&state.tf[0]));
BLAKE2S_ROUND(0);
BLAKE2S_ROUND(1);
@ -1000,7 +1000,7 @@ void BLAKE2_Compress32_POWER7(const byte* input, BLAKE2s_State& state)
row1 = ff0 = VectorLoad32LE( &state.h[0] );
row2 = ff1 = VectorLoad32LE( &state.h[4] );
row3 = VectorLoad32( &BLAKE2S_IV[0] );
row4 = vec_xor( VectorLoad32( &BLAKE2S_IV[4] ), VectorLoad32( &state.t[0] ) );
row4 = vec_xor( VectorLoad32( &BLAKE2S_IV[4] ), VectorLoad32( &state.tf[0] ) );
BLAKE2S_ROUND( 0 );
BLAKE2S_ROUND( 1 );