mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-23 09:59:42 +00:00
233 lines
6.9 KiB
C++
233 lines
6.9 KiB
C++
// shacal2.cpp - written by Kevin Springle, 2003
|
|
//
|
|
// Portions of this code were derived from
|
|
// Wei Dai's implementation of SHA-2
|
|
//
|
|
// Jack Lloyd and the Botan team allowed Crypto++ to use parts of
|
|
// Botan's implementation under the same license as Crypto++
|
|
// is released. The code for SHACAL2_Enc_ProcessAndXorBlock_SHANI
|
|
// below is Botan's x86_encrypt_blocks with minor tweaks. Many thanks
|
|
// to the Botan team. Also see http://github.com/randombit/botan/.
|
|
//
|
|
// The original code and all modifications are in the public domain.
|
|
|
|
#include "pch.h"
|
|
#include "config.h"
|
|
#include "shacal2.h"
|
|
#include "misc.h"
|
|
#include "cpu.h"
|
|
|
|
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
|
#include <immintrin.h>
|
|
#endif
|
|
|
|
// Clang __m128i casts
|
|
#define M128_CAST(x) ((__m128i *)(void *)(x))
|
|
#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
|
|
|
|
NAMESPACE_BEGIN(CryptoPP)
|
|
|
|
ANONYMOUS_NAMESPACE_BEGIN
|
|
|
|
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
|
void SHACAL2_Enc_ProcessAndXorBlock_SHANI(const word32* subKeys, const byte *inBlock, const byte *xorBlock, byte *outBlock)
|
|
{
|
|
CRYPTOPP_ASSERT(subKeys);
|
|
CRYPTOPP_ASSERT(inBlock);
|
|
CRYPTOPP_ASSERT(outBlock);
|
|
|
|
const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
|
|
const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
|
|
|
|
__m128i B0 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 0)), MASK1);
|
|
__m128i B1 = _mm_shuffle_epi8(_mm_loadu_si128(CONST_M128_CAST(inBlock + 16)), MASK2);
|
|
|
|
__m128i TMP = _mm_alignr_epi8(B0, B1, 8);
|
|
B1 = _mm_blend_epi16(B1, B0, 0xF0);
|
|
B0 = TMP;
|
|
|
|
#if 0
|
|
// SSE2 + SSSE3, but 0.2 cpb slower on a Celeraon J3455
|
|
const __m128i MASK1 = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7);
|
|
const __m128i MASK2 = _mm_set_epi8(0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15);
|
|
|
|
__m128i B0 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 0));
|
|
__m128i B1 = _mm_loadu_si128(CONST_M128_CAST(inBlock + 16));
|
|
|
|
__m128i TMP = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK2);
|
|
B1 = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK2);
|
|
B0 = TMP;
|
|
#endif
|
|
|
|
const byte* keys = reinterpret_cast<const byte*>(subKeys);
|
|
for (size_t i = 0; i != 8; ++i)
|
|
{
|
|
const __m128i RK0 = _mm_load_si128(CONST_M128_CAST(keys + 32*i));
|
|
const __m128i RK2 = _mm_load_si128(CONST_M128_CAST(keys + 32*i+16));
|
|
const __m128i RK1 = _mm_srli_si128(RK0, 8);
|
|
const __m128i RK3 = _mm_srli_si128(RK2, 8);
|
|
|
|
B1 = _mm_sha256rnds2_epu32(B1, B0, RK0);
|
|
B0 = _mm_sha256rnds2_epu32(B0, B1, RK1);
|
|
B1 = _mm_sha256rnds2_epu32(B1, B0, RK2);
|
|
B0 = _mm_sha256rnds2_epu32(B0, B1, RK3);
|
|
}
|
|
|
|
TMP = _mm_shuffle_epi8(_mm_unpackhi_epi64(B0, B1), MASK1);
|
|
B1 = _mm_shuffle_epi8(_mm_unpacklo_epi64(B0, B1), MASK1);
|
|
B0 = TMP;
|
|
|
|
if (xorBlock)
|
|
{
|
|
_mm_storeu_si128(M128_CAST(outBlock + 0),
|
|
_mm_xor_si128(B0, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 0))));
|
|
|
|
_mm_storeu_si128(M128_CAST(outBlock + 16),
|
|
_mm_xor_si128(B1, _mm_loadu_si128(CONST_M128_CAST(xorBlock + 16))));
|
|
}
|
|
else
|
|
{
|
|
_mm_storeu_si128(M128_CAST(outBlock + 0), B0);
|
|
_mm_storeu_si128(M128_CAST(outBlock + 16), B1);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
ANONYMOUS_NAMESPACE_END
|
|
|
|
// SHACAL-2 function and round definitions
|
|
|
|
#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22))
|
|
#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25))
|
|
#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3))
|
|
#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10))
|
|
|
|
#define Ch(x,y,z) (z^(x&(y^z)))
|
|
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
|
|
|
|
/* R is the SHA-256 round function. */
|
|
/* This macro increments the k argument as a side effect. */
|
|
#define R(a,b,c,d,e,f,g,h,k) \
|
|
h+=S1(e)+Ch(e,f,g)+*k++;d+=h;h+=S0(a)+Maj(a,b,c);
|
|
|
|
/* P is the inverse of the SHA-256 round function. */
|
|
/* This macro decrements the k argument as a side effect. */
|
|
#define P(a,b,c,d,e,f,g,h,k) \
|
|
h-=S0(a)+Maj(a,b,c);d-=h;h-=S1(e)+Ch(e,f,g)+*--k;
|
|
|
|
void SHACAL2::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, const NameValuePairs &)
|
|
{
|
|
AssertValidKeyLength(keylen);
|
|
|
|
word32 *rk = m_key;
|
|
unsigned int i;
|
|
|
|
GetUserKey(BIG_ENDIAN_ORDER, rk, m_key.size(), userKey, keylen);
|
|
for (i = 0; i < 48; i++, rk++)
|
|
{
|
|
rk[16] = rk[0] + s0(rk[1]) + rk[9] + s1(rk[14]);
|
|
rk[0] += K[i];
|
|
}
|
|
for (i = 48; i < 64; i++, rk++)
|
|
{
|
|
rk[0] += K[i];
|
|
}
|
|
}
|
|
|
|
typedef BlockGetAndPut<word32, BigEndian> Block;
|
|
|
|
void SHACAL2::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
|
|
{
|
|
#if CRYPTOPP_BOOL_SSE_SHA_INTRINSICS_AVAILABLE
|
|
if (HasSHA())
|
|
{
|
|
SHACAL2_Enc_ProcessAndXorBlock_SHANI(m_key, inBlock, xorBlock, outBlock);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
word32 a, b, c, d, e, f, g, h;
|
|
const word32 *rk = m_key;
|
|
|
|
/*
|
|
* map byte array block to cipher state:
|
|
*/
|
|
Block::Get(inBlock)(a)(b)(c)(d)(e)(f)(g)(h);
|
|
|
|
// Perform SHA-256 transformation.
|
|
|
|
/* 64 operations, partially loop unrolled */
|
|
for (unsigned int j=0; j<64; j+=8)
|
|
{
|
|
R(a,b,c,d,e,f,g,h,rk);
|
|
R(h,a,b,c,d,e,f,g,rk);
|
|
R(g,h,a,b,c,d,e,f,rk);
|
|
R(f,g,h,a,b,c,d,e,rk);
|
|
R(e,f,g,h,a,b,c,d,rk);
|
|
R(d,e,f,g,h,a,b,c,rk);
|
|
R(c,d,e,f,g,h,a,b,rk);
|
|
R(b,c,d,e,f,g,h,a,rk);
|
|
}
|
|
|
|
/*
|
|
* map cipher state to byte array block:
|
|
*/
|
|
|
|
Block::Put(xorBlock, outBlock)(a)(b)(c)(d)(e)(f)(g)(h);
|
|
}
|
|
|
|
void SHACAL2::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
|
|
{
|
|
word32 a, b, c, d, e, f, g, h;
|
|
const word32 *rk = m_key + 64;
|
|
|
|
/*
|
|
* map byte array block to cipher state:
|
|
*/
|
|
Block::Get(inBlock)(a)(b)(c)(d)(e)(f)(g)(h);
|
|
|
|
// Perform inverse SHA-256 transformation.
|
|
|
|
/* 64 operations, partially loop unrolled */
|
|
for (unsigned int j=0; j<64; j+=8)
|
|
{
|
|
P(b,c,d,e,f,g,h,a,rk);
|
|
P(c,d,e,f,g,h,a,b,rk);
|
|
P(d,e,f,g,h,a,b,c,rk);
|
|
P(e,f,g,h,a,b,c,d,rk);
|
|
P(f,g,h,a,b,c,d,e,rk);
|
|
P(g,h,a,b,c,d,e,f,rk);
|
|
P(h,a,b,c,d,e,f,g,rk);
|
|
P(a,b,c,d,e,f,g,h,rk);
|
|
}
|
|
|
|
/*
|
|
* map cipher state to byte array block:
|
|
*/
|
|
|
|
Block::Put(xorBlock, outBlock)(a)(b)(c)(d)(e)(f)(g)(h);
|
|
}
|
|
|
|
// The SHACAL-2 round constants are identical to the SHA-256 round constants.
|
|
const word32 SHACAL2::Base::K[64] =
|
|
{
|
|
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
|
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
|
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
|
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
|
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
|
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
|
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
|
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
|
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
|
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
|
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
|
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
|
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
|
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
|
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
|
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
|
};
|
|
|
|
NAMESPACE_END
|