mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-23 09:59:42 +00:00
Rework OperateKeystream (GH #678)
This improves Rabbit performance on a Core i5 6400 from 5.5 cpb to 4.7 cpb
This commit is contained in:
parent
665e16d340
commit
c9c51a5e01
132
rabbit.cpp
132
rabbit.cpp
@ -16,8 +16,6 @@ ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
using CryptoPP::word32;
|
||||
using CryptoPP::rotlConstant;
|
||||
#define ROTL32(x, y) rotlConstant<y>(x)
|
||||
#define U32V(x) static_cast<word32>(x)
|
||||
|
||||
word32 G_func(word32 x)
|
||||
{
|
||||
@ -29,11 +27,11 @@ word32 G_func(word32 x)
|
||||
b = x >> 16;
|
||||
|
||||
/* Calculate high and low result of squaring */
|
||||
h = (((U32V(a*a) >> 17U) + U32V(a*b)) >> 15U) + b*b;
|
||||
h = (((static_cast<word32>(a*a) >> 17U) + static_cast<word32>(a*b)) >> 15U) + b*b;
|
||||
l = x*x;
|
||||
|
||||
/* Return high XOR low */
|
||||
return U32V(h^l);
|
||||
return static_cast<word32>(h^l);
|
||||
}
|
||||
|
||||
word32 NextState(word32 c[8], word32 x[8], word32 carry)
|
||||
@ -46,29 +44,29 @@ word32 NextState(word32 c[8], word32 x[8], word32 carry)
|
||||
c_old[i] = c[i];
|
||||
|
||||
/* Calculate new counter values */
|
||||
c[0] = U32V(c[0] + 0x4D34D34D + carry);
|
||||
c[1] = U32V(c[1] + 0xD34D34D3 + (c[0] < c_old[0]));
|
||||
c[2] = U32V(c[2] + 0x34D34D34 + (c[1] < c_old[1]));
|
||||
c[3] = U32V(c[3] + 0x4D34D34D + (c[2] < c_old[2]));
|
||||
c[4] = U32V(c[4] + 0xD34D34D3 + (c[3] < c_old[3]));
|
||||
c[5] = U32V(c[5] + 0x34D34D34 + (c[4] < c_old[4]));
|
||||
c[6] = U32V(c[6] + 0x4D34D34D + (c[5] < c_old[5]));
|
||||
c[7] = U32V(c[7] + 0xD34D34D3 + (c[6] < c_old[6]));
|
||||
c[0] = static_cast<word32>(c[0] + 0x4D34D34D + carry);
|
||||
c[1] = static_cast<word32>(c[1] + 0xD34D34D3 + (c[0] < c_old[0]));
|
||||
c[2] = static_cast<word32>(c[2] + 0x34D34D34 + (c[1] < c_old[1]));
|
||||
c[3] = static_cast<word32>(c[3] + 0x4D34D34D + (c[2] < c_old[2]));
|
||||
c[4] = static_cast<word32>(c[4] + 0xD34D34D3 + (c[3] < c_old[3]));
|
||||
c[5] = static_cast<word32>(c[5] + 0x34D34D34 + (c[4] < c_old[4]));
|
||||
c[6] = static_cast<word32>(c[6] + 0x4D34D34D + (c[5] < c_old[5]));
|
||||
c[7] = static_cast<word32>(c[7] + 0xD34D34D3 + (c[6] < c_old[6]));
|
||||
carry = (c[7] < c_old[7]);
|
||||
|
||||
/* Calculate the g-values */
|
||||
for (i = 0; i<8; i++)
|
||||
g[i] = G_func(U32V(x[i] + c[i]));
|
||||
g[i] = G_func(static_cast<word32>(x[i] + c[i]));
|
||||
|
||||
/* Calculate new state values */
|
||||
x[0] = U32V(g[0] + ROTL32(g[7], 16) + ROTL32(g[6], 16));
|
||||
x[1] = U32V(g[1] + ROTL32(g[0], 8) + g[7]);
|
||||
x[2] = U32V(g[2] + ROTL32(g[1], 16) + ROTL32(g[0], 16));
|
||||
x[3] = U32V(g[3] + ROTL32(g[2], 8) + g[1]);
|
||||
x[4] = U32V(g[4] + ROTL32(g[3], 16) + ROTL32(g[2], 16));
|
||||
x[5] = U32V(g[5] + ROTL32(g[4], 8) + g[3]);
|
||||
x[6] = U32V(g[6] + ROTL32(g[5], 16) + ROTL32(g[4], 16));
|
||||
x[7] = U32V(g[7] + ROTL32(g[6], 8) + g[5]);
|
||||
x[0] = static_cast<word32>(g[0] + rotlConstant<16>(g[7]) + rotlConstant<16>(g[6]));
|
||||
x[1] = static_cast<word32>(g[1] + rotlConstant<8>(g[0]) + g[7]);
|
||||
x[2] = static_cast<word32>(g[2] + rotlConstant<16>(g[1]) + rotlConstant<16>(g[0]));
|
||||
x[3] = static_cast<word32>(g[3] + rotlConstant<8>(g[2]) + g[1]);
|
||||
x[4] = static_cast<word32>(g[4] + rotlConstant<16>(g[3]) + rotlConstant<16>(g[2]));
|
||||
x[5] = static_cast<word32>(g[5] + rotlConstant<8>(g[4]) + g[3]);
|
||||
x[6] = static_cast<word32>(g[6] + rotlConstant<16>(g[5]) + rotlConstant<16>(g[4]));
|
||||
x[7] = static_cast<word32>(g[7] + rotlConstant<8>(g[6]) + g[5]);
|
||||
|
||||
return carry;
|
||||
}
|
||||
@ -88,16 +86,16 @@ void RabbitPolicy::CipherSetKey(const NameValuePairs ¶ms, const byte *userKe
|
||||
m_mx[2] = m_t[1];
|
||||
m_mx[4] = m_t[2];
|
||||
m_mx[6] = m_t[3];
|
||||
m_mx[1] = U32V(m_t[3] << 16) | (m_t[2] >> 16);
|
||||
m_mx[3] = U32V(m_t[0] << 16) | (m_t[3] >> 16);
|
||||
m_mx[5] = U32V(m_t[1] << 16) | (m_t[0] >> 16);
|
||||
m_mx[7] = U32V(m_t[2] << 16) | (m_t[1] >> 16);
|
||||
m_mx[1] = static_cast<word32>(m_t[3] << 16) | (m_t[2] >> 16);
|
||||
m_mx[3] = static_cast<word32>(m_t[0] << 16) | (m_t[3] >> 16);
|
||||
m_mx[5] = static_cast<word32>(m_t[1] << 16) | (m_t[0] >> 16);
|
||||
m_mx[7] = static_cast<word32>(m_t[2] << 16) | (m_t[1] >> 16);
|
||||
|
||||
/* Generate initial counter values */
|
||||
m_mc[0] = ROTL32(m_t[2], 16);
|
||||
m_mc[2] = ROTL32(m_t[3], 16);
|
||||
m_mc[4] = ROTL32(m_t[0], 16);
|
||||
m_mc[6] = ROTL32(m_t[1], 16);
|
||||
m_mc[0] = rotlConstant<16>(m_t[2]);
|
||||
m_mc[2] = rotlConstant<16>(m_t[3]);
|
||||
m_mc[4] = rotlConstant<16>(m_t[0]);
|
||||
m_mc[6] = rotlConstant<16>(m_t[1]);
|
||||
m_mc[1] = (m_t[0] & 0xFFFF0000) | (m_t[1] & 0xFFFF);
|
||||
m_mc[3] = (m_t[1] & 0xFFFF0000) | (m_t[2] & 0xFFFF);
|
||||
m_mc[5] = (m_t[2] & 0xFFFF0000) | (m_t[3] & 0xFFFF);
|
||||
@ -125,30 +123,24 @@ void RabbitPolicy::CipherSetKey(const NameValuePairs ¶ms, const byte *userKe
|
||||
|
||||
void RabbitPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
|
||||
{
|
||||
// Rabbit's bufferByteSize in AdditiveCipherTemplate
|
||||
const unsigned int BUFFER_SIZE = 16;
|
||||
|
||||
for (unsigned int i = 0; i<iterationCount; ++i)
|
||||
byte* out = output;
|
||||
for (unsigned int i = 0; i<iterationCount; ++i, out += 16)
|
||||
{
|
||||
/* Iterate the system */
|
||||
m_wcy = NextState(m_wc, m_wx, m_wcy);
|
||||
|
||||
/* Encrypt/decrypt 16 bytes of data */
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, output + 0, m_wx[0] ^ (m_wx[5] >> 16) ^ (m_wx[3] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, output + 4, m_wx[2] ^ (m_wx[7] >> 16) ^ (m_wx[5] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, output + 8, m_wx[4] ^ (m_wx[1] >> 16) ^ (m_wx[7] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, output + 12, m_wx[6] ^ (m_wx[3] >> 16) ^ (m_wx[1] << 16));
|
||||
|
||||
// If AdditiveCipherTemplate does not have an accumulated keystream
|
||||
// then it will ask OperateKeystream to XOR the plaintext with
|
||||
// the keystream and write it to the ciphertext buffer.
|
||||
if ((operation & INPUT_NULL) != INPUT_NULL)
|
||||
xorbuf(output, input, BUFFER_SIZE);
|
||||
|
||||
/* Increment pointers to input and output data */
|
||||
input += BUFFER_SIZE;
|
||||
output += BUFFER_SIZE;
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, out + 0, m_wx[0] ^ (m_wx[5] >> 16) ^ (m_wx[3] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, out + 4, m_wx[2] ^ (m_wx[7] >> 16) ^ (m_wx[5] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, out + 8, m_wx[4] ^ (m_wx[1] >> 16) ^ (m_wx[7] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, out + 12, m_wx[6] ^ (m_wx[3] >> 16) ^ (m_wx[1] << 16));
|
||||
}
|
||||
|
||||
// If AdditiveCipherTemplate does not have an accumulated keystream
|
||||
// then it will ask OperateKeystream to XOR the plaintext with
|
||||
// the keystream and write it to the ciphertext buffer.
|
||||
if ((operation & INPUT_NULL) != INPUT_NULL)
|
||||
xorbuf(output, input, GetBytesPerIteration() * iterationCount);
|
||||
}
|
||||
|
||||
void RabbitWithIVPolicy::CipherSetKey(const NameValuePairs ¶ms, const byte *userKey, size_t keylen)
|
||||
@ -162,16 +154,16 @@ void RabbitWithIVPolicy::CipherSetKey(const NameValuePairs ¶ms, const byte *
|
||||
m_mx[2] = m_t[1];
|
||||
m_mx[4] = m_t[2];
|
||||
m_mx[6] = m_t[3];
|
||||
m_mx[1] = U32V(m_t[3] << 16) | (m_t[2] >> 16);
|
||||
m_mx[3] = U32V(m_t[0] << 16) | (m_t[3] >> 16);
|
||||
m_mx[5] = U32V(m_t[1] << 16) | (m_t[0] >> 16);
|
||||
m_mx[7] = U32V(m_t[2] << 16) | (m_t[1] >> 16);
|
||||
m_mx[1] = static_cast<word32>(m_t[3] << 16) | (m_t[2] >> 16);
|
||||
m_mx[3] = static_cast<word32>(m_t[0] << 16) | (m_t[3] >> 16);
|
||||
m_mx[5] = static_cast<word32>(m_t[1] << 16) | (m_t[0] >> 16);
|
||||
m_mx[7] = static_cast<word32>(m_t[2] << 16) | (m_t[1] >> 16);
|
||||
|
||||
/* Generate initial counter values */
|
||||
m_mc[0] = ROTL32(m_t[2], 16);
|
||||
m_mc[2] = ROTL32(m_t[3], 16);
|
||||
m_mc[4] = ROTL32(m_t[0], 16);
|
||||
m_mc[6] = ROTL32(m_t[1], 16);
|
||||
m_mc[0] = rotlConstant<16>(m_t[2]);
|
||||
m_mc[2] = rotlConstant<16>(m_t[3]);
|
||||
m_mc[4] = rotlConstant<16>(m_t[0]);
|
||||
m_mc[6] = rotlConstant<16>(m_t[1]);
|
||||
m_mc[1] = (m_t[0] & 0xFFFF0000) | (m_t[1] & 0xFFFF);
|
||||
m_mc[3] = (m_t[1] & 0xFFFF0000) | (m_t[2] & 0xFFFF);
|
||||
m_mc[5] = (m_t[2] & 0xFFFF0000) | (m_t[3] & 0xFFFF);
|
||||
@ -229,30 +221,24 @@ void RabbitWithIVPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *
|
||||
|
||||
void RabbitWithIVPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
|
||||
{
|
||||
// Rabbit's bufferByteSize in AdditiveCipherTemplate
|
||||
const unsigned int BUFFER_SIZE = 16;
|
||||
|
||||
for (unsigned int i = 0; i<iterationCount; ++i)
|
||||
byte* out = output;
|
||||
for (unsigned int i = 0; i<iterationCount; ++i, out += 16)
|
||||
{
|
||||
/* Iterate the system */
|
||||
m_wcy = NextState(m_wc, m_wx, m_wcy);
|
||||
|
||||
/* Encrypt/decrypt 16 bytes of data */
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, output + 0, m_wx[0] ^ (m_wx[5] >> 16) ^ (m_wx[3] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, output + 4, m_wx[2] ^ (m_wx[7] >> 16) ^ (m_wx[5] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, output + 8, m_wx[4] ^ (m_wx[1] >> 16) ^ (m_wx[7] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, output + 12, m_wx[6] ^ (m_wx[3] >> 16) ^ (m_wx[1] << 16));
|
||||
|
||||
// If AdditiveCipherTemplate does not have an accumulated keystream
|
||||
// then it will ask OperateKeystream to XOR the plaintext with
|
||||
// the keystream and write it to the ciphertext buffer.
|
||||
if ((operation & INPUT_NULL) != INPUT_NULL)
|
||||
xorbuf(output, input, BUFFER_SIZE);
|
||||
|
||||
/* Increment pointers to input and output data */
|
||||
input += BUFFER_SIZE;
|
||||
output += BUFFER_SIZE;
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, out + 0, m_wx[0] ^ (m_wx[5] >> 16) ^ (m_wx[3] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, out + 4, m_wx[2] ^ (m_wx[7] >> 16) ^ (m_wx[5] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, out + 8, m_wx[4] ^ (m_wx[1] >> 16) ^ (m_wx[7] << 16));
|
||||
PutWord(false, LITTLE_ENDIAN_ORDER, out + 12, m_wx[6] ^ (m_wx[3] >> 16) ^ (m_wx[1] << 16));
|
||||
}
|
||||
|
||||
// If AdditiveCipherTemplate does not have an accumulated keystream
|
||||
// then it will ask OperateKeystream to XOR the plaintext with
|
||||
// the keystream and write it to the ciphertext buffer.
|
||||
if ((operation & INPUT_NULL) != INPUT_NULL)
|
||||
xorbuf(output, input, GetBytesPerIteration() * iterationCount);
|
||||
}
|
||||
|
||||
NAMESPACE_END
|
||||
|
Loading…
Reference in New Issue
Block a user