diff --git a/lsh256.cpp b/lsh256.cpp index 07814100..47ab9d4c 100644 --- a/lsh256.cpp +++ b/lsh256.cpp @@ -863,6 +863,29 @@ inline void load_iv(word32* cv_l, word32* cv_r, const word32* iv) #endif } +inline void zero_iv(lsh_u32* cv_l, lsh_u32* cv_r) +{ +#if defined(CRYPTOPP_LSH512_AVX_AVAILABLE) + _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256()); + _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_setzero_si256()); + _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256()); + _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_setzero_si256()); + +#elif defined(CRYPTOPP_LSH512_SSE2_AVAILABLE) + _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_l+2), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_l+6), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_r+2), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_r+6), _mm_setzero_si128()); +#else + memset(cv_l, 0x00, 8*sizeof(lsh_u32)); + memset(cv_r, 0x00, 8*sizeof(lsh_u32)); +#endif +} + inline void zero_submsgs(LSH256_Context* ctx) { lsh_u32* sub_msgs = ctx->sub_msgs; @@ -980,16 +1003,14 @@ lsh_err lsh256_init(LSH256_Context* ctx) lsh_u32* cv_l = ctx->cv_l; lsh_u32* cv_r = ctx->cv_r; - memset(ctx->cv_l, 0, 8 * sizeof(lsh_u32)); - memset(ctx->cv_r, 0, 8 * sizeof(lsh_u32)); - - ctx->cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN; - ctx->cv_l[1] = LSH_GET_HASHBIT(algtype); - #if defined(CRYPTOPP_LSH256_AVX_AVAILABLE) AVX_Cleanup cleanup; #endif + zero_iv(cv_l, cv_r); + cv_l[0] = LSH256_HASH_VAL_MAX_BYTE_LEN; + cv_l[1] = LSH_GET_HASHBIT(algtype); + for (size_t i = 0; i < NUM_STEPS / 2; i++) { //Mix diff --git a/lsh512.cpp b/lsh512.cpp index b26c5718..30b1c8d2 100644 --- a/lsh512.cpp +++ b/lsh512.cpp @@ -1128,6 +1128,29 @@ inline void load_iv(word64* cv_l, word64* cv_r, const word64* iv) #endif } +inline void zero_iv(lsh_u64* cv_l, lsh_u64* cv_r) +{ +#if defined(CRYPTOPP_LSH512_AVX_AVAILABLE) + _mm256_storeu_si256(M256_CAST(cv_l+0), _mm256_setzero_si256()); + _mm256_storeu_si256(M256_CAST(cv_l+4), _mm256_setzero_si256()); + _mm256_storeu_si256(M256_CAST(cv_r+0), _mm256_setzero_si256()); + _mm256_storeu_si256(M256_CAST(cv_r+4), _mm256_setzero_si256()); + +#elif defined(CRYPTOPP_LSH512_SSE2_AVAILABLE) + _mm_storeu_si128(M128_CAST(cv_l+0), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_l+2), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_l+4), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_l+6), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_r+0), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_r+2), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_r+4), _mm_setzero_si128()); + _mm_storeu_si128(M128_CAST(cv_r+6), _mm_setzero_si128()); +#else + memset(cv_l, 0, 8*sizeof(lsh_u64)); + memset(cv_r, 0, 8*sizeof(lsh_u64)); +#endif +} + inline void zero_submsgs(LSH512_Context* ctx) { lsh_u64* sub_msgs = ctx->sub_msgs; @@ -1300,16 +1323,14 @@ lsh_err lsh512_init(LSH512_Context* ctx) lsh_u64* cv_l = ctx->cv_l; lsh_u64* cv_r = ctx->cv_r; - memset(cv_l, 0, 8 * sizeof(lsh_u64)); - memset(cv_r, 0, 8 * sizeof(lsh_u64)); - - cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN; - cv_l[1] = LSH_GET_HASHBIT(algtype); - #if defined(CRYPTOPP_LSH512_AVX_AVAILABLE) AVX_Cleanup cleanup; #endif + zero_iv(cv_l, cv_r); + cv_l[0] = LSH512_HASH_VAL_MAX_BYTE_LEN; + cv_l[1] = LSH_GET_HASHBIT(algtype); + for (size_t i = 0; i < NUM_STEPS / 2; i++) { //Mix