Use SecWordBlock for ARIA round keys

This may allow the compiler to optimize ARIA_GSRK. Instead of a memcpy, the memory can be used as a word32.
This commit is contained in:
Jeffrey Walton 2023-09-30 05:47:12 -04:00
parent 9ac6907944
commit d3d23002f6
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 32 additions and 36 deletions

View File

@ -27,10 +27,6 @@ using CryptoPP::ARIATab::X1;
using CryptoPP::ARIATab::X2; using CryptoPP::ARIATab::X2;
using CryptoPP::ARIATab::KRK; using CryptoPP::ARIATab::KRK;
inline word32* UINT32_CAST(const byte* ptr) {
return reinterpret_cast<word32*>(const_cast<byte*>(ptr));
}
inline byte ARIA_BRF(const word32 x, const int y) { inline byte ARIA_BRF(const word32 x, const int y) {
return static_cast<byte>(GETBYTE(x, y)); return static_cast<byte>(GETBYTE(x, y));
} }
@ -91,22 +87,22 @@ inline void ARIA_FE(word32 t[4]) {
// n-bit right shift of Y XORed to X // n-bit right shift of Y XORed to X
template <unsigned int N> template <unsigned int N>
inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], byte RK[16]) inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], word32 RK[4])
{ {
// MSVC is not generating a "rotate immediate". Constify to help it along. // MSVC is not generating a "rotate immediate". Constify to help it along.
static const unsigned int Q = 4-(N/32); static const unsigned int Q = 4-(N/32);
static const unsigned int R = N % 32; static const unsigned int R = N % 32;
#if (CRYPTOPP_LITTLE_ENDIAN) #if (CRYPTOPP_LITTLE_ENDIAN)
PutWord(false, LITTLE_ENDIAN_ORDER, RK+ 0, (X[0]) ^ ((Y[(Q )%4])>>R) ^ ((Y[(Q+3)%4])<<(32-R))); RK[0] = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, (X[0]) ^ ((Y[(Q )%4])>>R) ^ ((Y[(Q+3)%4])<<(32-R)));
PutWord(false, LITTLE_ENDIAN_ORDER, RK+ 4, (X[1]) ^ ((Y[(Q+1)%4])>>R) ^ ((Y[(Q )%4])<<(32-R))); RK[1] = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, (X[1]) ^ ((Y[(Q+1)%4])>>R) ^ ((Y[(Q )%4])<<(32-R)));
PutWord(false, LITTLE_ENDIAN_ORDER, RK+ 8, (X[2]) ^ ((Y[(Q+2)%4])>>R) ^ ((Y[(Q+1)%4])<<(32-R))); RK[2] = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, (X[2]) ^ ((Y[(Q+2)%4])>>R) ^ ((Y[(Q+1)%4])<<(32-R)));
PutWord(false, LITTLE_ENDIAN_ORDER, RK+12, (X[3]) ^ ((Y[(Q+3)%4])>>R) ^ ((Y[(Q+2)%4])<<(32-R))); RK[3] = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, (X[3]) ^ ((Y[(Q+3)%4])>>R) ^ ((Y[(Q+2)%4])<<(32-R)));
#else #else
PutWord(false, BIG_ENDIAN_ORDER, RK+ 0, (X[0]) ^ ((Y[(Q )%4])>>R) ^ ((Y[(Q+3)%4])<<(32-R))); RK[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, (X[0]) ^ ((Y[(Q )%4])>>R) ^ ((Y[(Q+3)%4])<<(32-R)));
PutWord(false, BIG_ENDIAN_ORDER, RK+ 4, (X[1]) ^ ((Y[(Q+1)%4])>>R) ^ ((Y[(Q )%4])<<(32-R))); RK[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, (X[1]) ^ ((Y[(Q+1)%4])>>R) ^ ((Y[(Q )%4])<<(32-R)));
PutWord(false, BIG_ENDIAN_ORDER, RK+ 8, (X[2]) ^ ((Y[(Q+2)%4])>>R) ^ ((Y[(Q+1)%4])<<(32-R))); RK[2] = ConditionalByteReverse(BIG_ENDIAN_ORDER, (X[2]) ^ ((Y[(Q+2)%4])>>R) ^ ((Y[(Q+1)%4])<<(32-R)));
PutWord(false, BIG_ENDIAN_ORDER, RK+12, (X[3]) ^ ((Y[(Q+3)%4])>>R) ^ ((Y[(Q+2)%4])<<(32-R))); RK[3] = ConditionalByteReverse(BIG_ENDIAN_ORDER, (X[3]) ^ ((Y[(Q+3)%4])>>R) ^ ((Y[(Q+2)%4])<<(32-R)));
#endif #endif
} }
@ -114,10 +110,9 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
{ {
CRYPTOPP_UNUSED(params); CRYPTOPP_UNUSED(params);
m_rk.New(16*17); // round keys m_rk.New(4*17); // round keys
m_w.New(4*7+4); // w0, w1, w2, w3, t and u m_w.New(4*28); // w0, w1, w2, w3, t and u
byte *rk = m_rk.data();
int Q, q, R, r; int Q, q, R, r;
switch (keylen) switch (keylen)
@ -140,6 +135,7 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
} }
// w0 has room for 32 bytes. w1-w3 each has room for 16 bytes. t and u are 16 byte temp areas. // w0 has room for 32 bytes. w1-w3 each has room for 16 bytes. t and u are 16 byte temp areas.
// The storage requrements for w0-w3, t and u are 112 bytes or 28 words.
word32 *w0 = m_w.data(), *w1 = m_w.data()+8, *w2 = m_w.data()+12, *w3 = m_w.data()+16, *t = m_w.data()+20; word32 *w0 = m_w.data(), *w1 = m_w.data()+8, *w2 = m_w.data()+12, *w3 = m_w.data()+16, *t = m_w.data()+20;
GetBlock<word32, BigEndian, false>block(key); GetBlock<word32, BigEndian, false>block(key);
@ -181,29 +177,29 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
w3[0]=t[0]^w1[0]; w3[1]=t[1]^w1[1]; w3[2]=t[2]^w1[2]; w3[3]=t[3]^w1[3]; w3[0]=t[0]^w1[0]; w3[1]=t[1]^w1[1]; w3[2]=t[2]^w1[2]; w3[3]=t[3]^w1[3];
ARIA_GSRK<19>(w0, w1, rk + 0); ARIA_GSRK<19>(w0, w1, m_rk + 0);
ARIA_GSRK<19>(w1, w2, rk + 16); ARIA_GSRK<19>(w1, w2, m_rk + 4);
ARIA_GSRK<19>(w2, w3, rk + 32); ARIA_GSRK<19>(w2, w3, m_rk + 8);
ARIA_GSRK<19>(w3, w0, rk + 48); ARIA_GSRK<19>(w3, w0, m_rk + 12);
ARIA_GSRK<31>(w0, w1, rk + 64); ARIA_GSRK<31>(w0, w1, m_rk + 16);
ARIA_GSRK<31>(w1, w2, rk + 80); ARIA_GSRK<31>(w1, w2, m_rk + 20);
ARIA_GSRK<31>(w2, w3, rk + 96); ARIA_GSRK<31>(w2, w3, m_rk + 24);
ARIA_GSRK<31>(w3, w0, rk + 112); ARIA_GSRK<31>(w3, w0, m_rk + 28);
ARIA_GSRK<67>(w0, w1, rk + 128); ARIA_GSRK<67>(w0, w1, m_rk + 32);
ARIA_GSRK<67>(w1, w2, rk + 144); ARIA_GSRK<67>(w1, w2, m_rk + 36);
ARIA_GSRK<67>(w2, w3, rk + 160); ARIA_GSRK<67>(w2, w3, m_rk + 40);
ARIA_GSRK<67>(w3, w0, rk + 176); ARIA_GSRK<67>(w3, w0, m_rk + 44);
ARIA_GSRK<97>(w0, w1, rk + 192); ARIA_GSRK<97>(w0, w1, m_rk + 48);
if (keylen > 16) if (keylen > 16)
{ {
ARIA_GSRK<97>(w1, w2, rk + 208); ARIA_GSRK<97>(w1, w2, m_rk + 52);
ARIA_GSRK<97>(w2, w3, rk + 224); ARIA_GSRK<97>(w2, w3, m_rk + 56);
if (keylen > 24) if (keylen > 24)
{ {
ARIA_GSRK< 97>(w3, w0, rk + 240); ARIA_GSRK< 97>(w3, w0, m_rk + 60);
ARIA_GSRK<109>(w0, w1, rk + 256); ARIA_GSRK<109>(w0, w1, m_rk + 64);
} }
} }
@ -211,10 +207,10 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
if (!IsForwardTransformation()) if (!IsForwardTransformation())
{ {
word32 *a, *z, *s; word32 *a, *z, *s;
rk = m_rk.data();
r = R; q = Q; r = R; q = Q;
a=UINT32_CAST(rk); s=m_w.data()+24; z=a+r*4; // s reuses w0 temp area
a=m_rk.data(); s=m_w.data()+0; z=a+r*4;
std::memcpy(t, a, 16); std::memcpy(a, z, 16); std::memcpy(z, t, 16); std::memcpy(t, a, 16); std::memcpy(a, z, 16); std::memcpy(z, t, 16);
a+=4; z-=4; a+=4; z-=4;

2
aria.h
View File

@ -53,7 +53,7 @@ public:
typedef SecBlock<byte, AllocatorWithCleanup<byte, true> > AlignedByteBlock; typedef SecBlock<byte, AllocatorWithCleanup<byte, true> > AlignedByteBlock;
typedef SecBlock<word32, AllocatorWithCleanup<word32, true> > AlignedWordBlock; typedef SecBlock<word32, AllocatorWithCleanup<word32, true> > AlignedWordBlock;
AlignedByteBlock m_rk; // round keys AlignedWordBlock m_rk; // round keys
AlignedWordBlock m_w; // w0, w1, w2, w3, t and u AlignedWordBlock m_w; // w0, w1, w2, w3, t and u
unsigned int m_rounds; unsigned int m_rounds;
}; };