ext-cryptopp/shacal2.cpp
2017-08-16 21:43:31 -04:00

233 lines
6.9 KiB
C++

// shacal2.cpp - written by Kevin Springle, 2003
//
// Portions of this code were derived from
// Wei Dai's implementation of SHA-2
//
// Jack Lloyd and the Botan team allowed Crypto++ to use parts of
// Botan's implementation under the same license as Crypto++
// is released. The code for SHACAL2_Enc_ProcessAndXorBlock_SHANI
// below is Botan's x86_encrypt_blocks with minor tweaks. Many thanks
// to the Botan team. Also see http://github.com/randombit/botan/.
//
// The original code and all modifications are in the public domain.
#include "pch.h"
#include "config.h"
#include "shacal2.h"
#include "misc.h"
#include "cpu.h"
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
#include <immintrin.h>
#endif
// Clang __m128i casts
#define M128_CAST(x) ((__m128i *)(void *)(x))
#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
NAMESPACE_BEGIN(CryptoPP)
ANONYMOUS_NAMESPACE_BEGIN
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
void SHACAL2_Enc_ProcessAndXorBlock_SHANI(const word32* subKeys, const byte *inBlock, const byte *xorBlock, byte *outBlock)
{
CRYPTOPP_ASSERT(subKeys);
CRYPTOPP_ASSERT(inBlock);
CRYPTOPP_ASSERT(outBlock);
const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
__m128i B0 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 0)), MASK1);
__m128i B1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 16)), MASK2);
__m128i TMP = _mm_alignr_epi8(B0, B1, 8);
B1 = _mm_blend_epi16(B1, B0, 0xF0);
B0 = TMP;
#if 0
// SSE2 + SSSE3, but 0.2 cpb slower on a Celeraon J3455
const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
__m128i B0 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 0));
__m128i B1 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 16));
__m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK2);
B1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK2);
B0 = TMP;
#endif
const byte* keys = reinterpret_cast<const byte*>(subKeys);
for (size_t i = 0; i != 8; ++i)
{
const __m128i RK0 = _mm_load_si128(CONST_M128_CAST(keys + 32*i));
const __m128i RK2 = _mm_load_si128(CONST_M128_CAST(keys + 32*i+16));
const __m128i RK1 = _mm_srli_si128(RK0, 8);
const __m128i RK3 = _mm_srli_si128(RK2, 8);
B1 = _mm_sha256rnds2_epu32(B1, B0, RK0);
B0 = _mm_sha256rnds2_epu32(B0, B1, RK1);
B1 = _mm_sha256rnds2_epu32(B1, B0, RK2);
B0 = _mm_sha256rnds2_epu32(B0, B1, RK3);
}
TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK1);
B1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK1);
B0 = TMP;
if (xorBlock)
{
_mm_storeu_si128(M128_CAST(outBlock + 0),
_mm_xor_si128(B0, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 0))));
_mm_storeu_si128(M128_CAST(outBlock + 16),
_mm_xor_si128(B1, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 16))));
}
else
{
_mm_storeu_si128(M128_CAST(outBlock + 0), B0);
_mm_storeu_si128(M128_CAST(outBlock + 16), B1);
}
}
#endif
ANONYMOUS_NAMESPACE_END
// SHACAL-2 function and round definitions
#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
#define Ch(x,y,z) (z^(x&(y^z)))
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
/* R is the SHA-256 round function. */
/* This macro increments the k argument as a side effect. */
#define R(a,b,c,d,e,f,g,h,k) \
h+=S1(e)+Ch(e,f,g)+*k++;d+=h;h+=S0(a)+Maj(a,b,c);
/* P is the inverse of the SHA-256 round function. */
/* This macro decrements the k argument as a side effect. */
#define P(a,b,c,d,e,f,g,h,k) \
h-=S0(a)+Maj(a,b,c);d-=h;h-=S1(e)+Ch(e,f,g)+*--k;
void SHACAL2::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
{
AssertValidKeyLength(keylen);
word32 *rk = m_key;
unsigned int i;
GetUserKey(BIG_ENDIAN_ORDER, rk, m_key.size(), userKey, keylen);
for (i = 0; i < 48; i++, rk++)
{
rk[16] = rk[0] + s0(rk[1]) + rk[9] + s1(rk[14]);
rk[0] += K[i];
}
for (i = 48; i < 64; i++, rk++)
{
rk[0] += K[i];
}
}
typedef BlockGetAndPut<word32, BigEndian> Block;
void SHACAL2::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
if (HasSHA())
{
SHACAL2_Enc_ProcessAndXorBlock_SHANI(m_key, inBlock, xorBlock, outBlock);
return;
}
#endif
word32 a, b, c, d, e, f, g, h;
const word32 *rk = m_key;
/*
* map byte array block to cipher state:
*/
Block::Get(inBlock)(a)(b)(c)(d)(e)(f)(g)(h);
// Perform SHA-256 transformation.
/* 64 operations, partially loop unrolled */
for (unsigned int j=0; j<64; j+=8)
{
R(a,b,c,d,e,f,g,h,rk);
R(h,a,b,c,d,e,f,g,rk);
R(g,h,a,b,c,d,e,f,rk);
R(f,g,h,a,b,c,d,e,rk);
R(e,f,g,h,a,b,c,d,rk);
R(d,e,f,g,h,a,b,c,rk);
R(c,d,e,f,g,h,a,b,rk);
R(b,c,d,e,f,g,h,a,rk);
}
/*
* map cipher state to byte array block:
*/
Block::Put(xorBlock, outBlock)(a)(b)(c)(d)(e)(f)(g)(h);
}
void SHACAL2::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
word32 a, b, c, d, e, f, g, h;
const word32 *rk = m_key + 64;
/*
* map byte array block to cipher state:
*/
Block::Get(inBlock)(a)(b)(c)(d)(e)(f)(g)(h);
// Perform inverse SHA-256 transformation.
/* 64 operations, partially loop unrolled */
for (unsigned int j=0; j<64; j+=8)
{
P(b,c,d,e,f,g,h,a,rk);
P(c,d,e,f,g,h,a,b,rk);
P(d,e,f,g,h,a,b,c,rk);
P(e,f,g,h,a,b,c,d,rk);
P(f,g,h,a,b,c,d,e,rk);
P(g,h,a,b,c,d,e,f,rk);
P(h,a,b,c,d,e,f,g,rk);
P(a,b,c,d,e,f,g,h,rk);
}
/*
* map cipher state to byte array block:
*/
Block::Put(xorBlock, outBlock)(a)(b)(c)(d)(e)(f)(g)(h);
}
// The SHACAL-2 round constants are identical to the SHA-256 round constants.
const word32 SHACAL2::Base::K[64] =
{
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
NAMESPACE_END