tweaks/fixes for 5.6

This commit is contained in:
weidai 2009-03-03 03:28:39 +00:00
parent 7d88bbd9ed
commit 8565900724
11 changed files with 301 additions and 298 deletions

View File

@ -414,6 +414,7 @@ the mailing list.
5.6 - added AuthenticatedSymmetricCipher interface class and Filter wrappers
- added CCM, GCM (with SSE2 assembly), CMAC, and SEED
- added support for variable length IVs
- improved AES speed on x86 and x64
- fixed run-time validation error on x86-64 with GCC 4.3.2 -O2
- fixed HashFilter bug when putMessage=true

View File

@ -104,7 +104,7 @@ void AuthenticatedSymmetricCipherBase::Update(const byte *input, size_t length)
void AuthenticatedSymmetricCipherBase::ProcessData(byte *outString, const byte *inString, size_t length)
{
m_totalMessageLength += length;
if (m_totalMessageLength > MaxMessageLength())
if (m_state >= State_IVSet && m_totalMessageLength > MaxMessageLength())
throw InvalidArgument(AlgorithmName() + ": message length exceeds maximum");
reswitch:

10
ccm.h
View File

@ -73,7 +73,7 @@ protected:
};
//! .
template <class T_BlockCipher, int T_DefaultDigestBitSize, bool T_IsEncryption>
template <class T_BlockCipher, int T_DefaultDigestSize, bool T_IsEncryption>
class CCM_Final : public CCM_Base
{
public:
@ -84,16 +84,16 @@ public:
private:
BlockCipher & AccessBlockCipher() {return m_cipher;}
int DefaultDigestSize() const {return T_DefaultDigestBitSize/8;}
int DefaultDigestSize() const {return T_DefaultDigestSize;}
typename T_BlockCipher::Encryption m_cipher;
};
/// <a href="http://www.cryptolounge.org/wiki/CCM">CCM</a>
template <class T_BlockCipher, int T_DefaultDigestBitSize = 128>
template <class T_BlockCipher, int T_DefaultDigestSize = 16>
struct CCM : public AuthenticatedSymmetricCipherDocumentation
{
typedef CCM_Final<T_BlockCipher, T_DefaultDigestBitSize, true> Encryption;
typedef CCM_Final<T_BlockCipher, T_DefaultDigestBitSize, false> Decryption;
typedef CCM_Final<T_BlockCipher, T_DefaultDigestSize, true> Encryption;
typedef CCM_Final<T_BlockCipher, T_DefaultDigestSize, false> Decryption;
};
NAMESPACE_END

View File

@ -205,6 +205,24 @@ void AuthenticatedSymmetricCipher::SpecifyDataLengths(lword headerLength, lword
UncheckedSpecifyDataLengths(headerLength, messageLength, footerLength);
}
void AuthenticatedSymmetricCipher::EncryptAndAuthenticate(byte *ciphertext, byte *mac, size_t macSize, const byte *iv, int ivLength, const byte *header, size_t headerLength, const byte *message, size_t messageLength)
{
Resynchronize(iv, ivLength);
SpecifyDataLengths(headerLength, messageLength);
Update(header, headerLength);
ProcessString(ciphertext, message, messageLength);
TruncatedFinal(mac, macSize);
}
bool AuthenticatedSymmetricCipher::DecryptAndVerify(byte *message, const byte *mac, size_t macLength, const byte *iv, int ivLength, const byte *header, size_t headerLength, const byte *ciphertext, size_t ciphertextLength)
{
Resynchronize(iv, ivLength);
SpecifyDataLengths(headerLength, ciphertextLength);
Update(header, headerLength);
ProcessString(message, ciphertext, ciphertextLength);
return TruncatedVerify(mac, macLength);
}
unsigned int RandomNumberGenerator::GenerateBit()
{
return GenerateByte() & 1;

View File

@ -377,10 +377,10 @@ public:
//! calls SetKey() with an NameValuePairs object that just specifies "Rounds"
void SetKeyWithRounds(const byte *key, size_t length, int rounds);
//! calls SetKey() with an NameValuePairs object that just specifies "IVWithLength"
//! calls SetKey() with an NameValuePairs object that just specifies "IV"
void SetKeyWithIV(const byte *key, size_t length, const byte *iv, size_t ivLength);
//! calls SetKey() with an NameValuePairs object that just specifies "IVWithLength"
//! calls SetKey() with an NameValuePairs object that just specifies "IV"
void SetKeyWithIV(const byte *key, size_t length, const byte *iv)
{SetKeyWithIV(key, length, iv, IVSize());}
@ -560,9 +560,13 @@ public:
virtual void Restart()
{TruncatedFinal(NULL, 0);}
//! size of the hash returned by Final()
//! size of the hash/digest/MAC returned by Final()
virtual unsigned int DigestSize() const =0;
//! same as DigestSize()
unsigned int TagSize() const {return DigestSize();}
//! block size of underlying compression function, or 0 if not block based
virtual unsigned int BlockSize() const {return 0;}
@ -641,9 +645,6 @@ public:
explicit BadState(const std::string &name, const char *function, const char *state) : Exception(OTHER_ERROR, name + ": " + function + " was called before " + state) {}
};
// redeclare this to avoid compiler ambiguity errors
virtual std::string AlgorithmName() const =0;
//! the maximum length of AAD that can be input before the encrypted data
virtual lword MaxHeaderLength() const =0;
//! the maximum length of encrypted data
@ -655,6 +656,13 @@ public:
virtual bool NeedsPrespecifiedDataLengths() const {return false;}
//! this function only needs to be called if NeedsPrespecifiedDataLengths() returns true
void SpecifyDataLengths(lword headerLength, lword messageLength, lword footerLength=0);
//! encrypt and generate MAC in one call. will truncate MAC if macSize < TagSize()
virtual void EncryptAndAuthenticate(byte *ciphertext, byte *mac, size_t macSize, const byte *iv, int ivLength, const byte *header, size_t headerLength, const byte *message, size_t messageLength);
//! decrypt and verify MAC in one call, returning true iff MAC is valid. will assume MAC is truncated if macLength < TagSize()
virtual bool DecryptAndVerify(byte *message, const byte *mac, size_t macLength, const byte *iv, int ivLength, const byte *header, size_t headerLength, const byte *ciphertext, size_t ciphertextLength);
// redeclare this to avoid compiler ambiguity errors
virtual std::string AlgorithmName() const =0;
protected:
const Algorithm & GetAlgorithm() const {return *static_cast<const MessageAuthenticationCode *>(this);}

View File

@ -397,7 +397,7 @@ void TestAuthenticatedSymmetricCipher(TestData &v, const NameValuePairs &overrid
std::string encrypted, decrypted;
AuthenticatedEncryptionFilter ef(*asc1, new StringSink(encrypted));
AuthenticatedDecryptionFilter df(*asc2, new StringSink(decrypted), AuthenticatedDecryptionFilter::DEFAULT_PADDING, AuthenticatedDecryptionFilter::MAC_AT_BEGIN);
AuthenticatedDecryptionFilter df(*asc2, new StringSink(decrypted), AuthenticatedDecryptionFilter::MAC_AT_BEGIN);
if (asc1->NeedsPrespecifiedDataLengths())
{

View File

@ -540,6 +540,18 @@ size_t ArrayXorSink::Put2(const byte *begin, size_t length, int messageEnd, bool
// *************************************************************
StreamTransformationFilter::StreamTransformationFilter(StreamTransformation &c, BufferedTransformation *attachment, BlockPaddingScheme padding, bool allowAuthenticatedSymmetricCipher)
: FilterWithBufferedInput(attachment)
, m_cipher(c)
{
assert(c.MinLastBlockSize() == 0 || c.MinLastBlockSize() > c.MandatoryBlockSize());
if (!allowAuthenticatedSymmetricCipher && dynamic_cast<AuthenticatedSymmetricCipher *>(&c) != 0)
throw InvalidArgument("StreamTransformationFilter: please use AuthenticatedEncryptionFilter and AuthenticatedDecryptionFilter for AuthenticatedSymmetricCipher");
IsolatedInitialize(MakeParameters(Name::BlockPaddingScheme(), padding));
}
size_t StreamTransformationFilter::LastBlockSize(StreamTransformation &c, BlockPaddingScheme padding)
{
if (c.MinLastBlockSize() > 0)
@ -550,15 +562,6 @@ size_t StreamTransformationFilter::LastBlockSize(StreamTransformation &c, BlockP
return 0;
}
StreamTransformationFilter::StreamTransformationFilter(StreamTransformation &c, BufferedTransformation *attachment, BlockPaddingScheme padding)
: FilterWithBufferedInput(attachment)
, m_cipher(c)
{
assert(c.MinLastBlockSize() == 0 || c.MinLastBlockSize() > c.MandatoryBlockSize());
IsolatedInitialize(MakeParameters(Name::BlockPaddingScheme(), padding));
}
void StreamTransformationFilter::InitializeDerivedAndReturnNewSizes(const NameValuePairs &parameters, size_t &firstSize, size_t &blockSize, size_t &lastSize)
{
BlockPaddingScheme padding = parameters.GetValueWithDefault(Name::BlockPaddingScheme(), DEFAULT_PADDING);
@ -804,8 +807,8 @@ void HashVerificationFilter::LastPut(const byte *inString, size_t length)
// *************************************************************
AuthenticatedEncryptionFilter::AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment,
BlockPaddingScheme padding, bool putMessage, int truncatedDigestSize, const std::string &macChannel)
: StreamTransformationFilter(c, attachment, padding)
bool putMessage, int truncatedDigestSize, const std::string &macChannel, BlockPaddingScheme padding)
: StreamTransformationFilter(c, attachment, padding, true)
, m_hf(c, new OutputProxy(*this, false), putMessage, truncatedDigestSize, "AAD", macChannel)
{
assert(c.IsForwardTransformation());
@ -847,10 +850,10 @@ void AuthenticatedEncryptionFilter::LastPut(const byte *inString, size_t length)
// *************************************************************
AuthenticatedDecryptionFilter::AuthenticatedDecryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment, BlockPaddingScheme padding, word32 flags, int truncatedDigestSize)
AuthenticatedDecryptionFilter::AuthenticatedDecryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment, word32 flags, int truncatedDigestSize, BlockPaddingScheme padding)
: FilterWithBufferedInput(attachment)
, m_hashVerifier(c, new OutputProxy(*this, false))
, m_streamFilter(c, new OutputProxy(*this, false))
, m_streamFilter(c, new OutputProxy(*this, false), padding, true)
{
assert(!c.IsForwardTransformation() || c.IsSelfInverting());
IsolatedInitialize(MakeParameters(Name::BlockPaddingScheme(), padding)(Name::AuthenticatedDecryptionFilterFlags(), flags)(Name::TruncatedDigestSize(), truncatedDigestSize));

View File

@ -263,7 +263,7 @@ public:
/*! DEFAULT_PADDING means PKCS_PADDING if c.MandatoryBlockSize() > 1 && c.MinLastBlockSize() == 0 (e.g. ECB or CBC mode),
otherwise NO_PADDING (OFB, CFB, CTR, CBC-CTS modes).
See http://www.weidai.com/scan-mirror/csp.html for details of the padding schemes. */
StreamTransformationFilter(StreamTransformation &c, BufferedTransformation *attachment = NULL, BlockPaddingScheme padding = DEFAULT_PADDING);
StreamTransformationFilter(StreamTransformation &c, BufferedTransformation *attachment = NULL, BlockPaddingScheme padding = DEFAULT_PADDING, bool allowAuthenticatedSymmetricCipher = false);
std::string AlgorithmName() const {return m_cipher.AlgorithmName();}
@ -345,7 +345,7 @@ class CRYPTOPP_DLL AuthenticatedEncryptionFilter : public StreamTransformationFi
{
public:
/*! See StreamTransformationFilter for documentation on BlockPaddingScheme */
AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, BlockPaddingScheme padding = DEFAULT_PADDING, bool putMessage=false, int truncatedDigestSize=-1, const std::string &macChannel=NULL_CHANNEL);
AuthenticatedEncryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, bool putMessage=false, int truncatedDigestSize=-1, const std::string &macChannel=NULL_CHANNEL, BlockPaddingScheme padding = DEFAULT_PADDING);
void IsolatedInitialize(const NameValuePairs &parameters);
byte * ChannelCreatePutSpace(const std::string &channel, size_t &size);
@ -364,7 +364,7 @@ public:
enum Flags {MAC_AT_BEGIN=1, THROW_EXCEPTION=16, DEFAULT_FLAGS = THROW_EXCEPTION};
/*! See StreamTransformationFilter for documentation on BlockPaddingScheme */
AuthenticatedDecryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, BlockPaddingScheme padding = DEFAULT_PADDING, word32 flags = DEFAULT_FLAGS, int truncatedDigestSize=-1);
AuthenticatedDecryptionFilter(AuthenticatedSymmetricCipher &c, BufferedTransformation *attachment = NULL, word32 flags = DEFAULT_FLAGS, int truncatedDigestSize=-1, BlockPaddingScheme padding = DEFAULT_PADDING);
std::string AlgorithmName() const {return m_hashVerifier.AlgorithmName();}
byte * ChannelCreatePutSpace(const std::string &channel, size_t &size);

View File

@ -97,11 +97,11 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
{
s_reductionTable[0] = 0;
word16 x = 0x01c2;
s_reductionTable[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, x);
s_reductionTable[1] = ByteReverse(x);
for (int i=2; i<=0x80; i*=2)
{
x <<= 1;
s_reductionTable[i] = ConditionalByteReverse(BIG_ENDIAN_ORDER, x);
s_reductionTable[i] = ByteReverse(x);
for (int j=1; j<i; j++)
s_reductionTable[i+j] = s_reductionTable[i] ^ s_reductionTable[j];
}
@ -198,7 +198,7 @@ void GCM_AuthenticateBlocks_64K(const byte *data, size_t blocks, word64 *hashBuf
size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
{
typedef BlockGetAndPut<word64, NativeByteOrder, false, true> Block;
typedef BlockGetAndPut<word64, NativeByteOrder> Block;
word64 *hashBuffer = (word64 *)HashBuffer();
switch (2*(m_buffer.size()>=64*1024)

View File

@ -5,17 +5,20 @@
// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM rijndael.cpp" to generate MASM code
/*
The assembly code was rewritten in Feb 2009 by Wei Dai to do counter mode
Feb 2009: The x86/x64 assembly code was rewritten in by Wei Dai to do counter mode
caching, which was invented by Hongjun Wu and popularized by Daniel J. Bernstein
and Peter Schwabe in their paper "New AES software speed records". The round
function was also modified to include a trick similar to one in Brian Gladman's
x86 assembly code, doing an 8-bit register move to minimize the number of
register spills. Also switched to compressed tables and copying round keys to
the stack.
The C++ implementation now uses compressed tables if
CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is defined.
*/
/*
Defense against timing attacks was added in July 2006 by Wei Dai.
July 2006: Defense against timing attacks was added in by Wei Dai.
The code now uses smaller tables in the first and last rounds,
and preloads them into L1 cache before usage (by loading at least
@ -75,12 +78,66 @@ using namespace rdtable;
#else
static word64 Te[256];
#endif
static word32 Td[256*4];
static word64 Td[256];
#else
static word32 Te[256*4], Td[256*4];
#endif
static bool s_TeFilled = false, s_TdFilled = false;
// ************************* Portable Code ************************************
#define QUARTER_ROUND(L, T, t, a, b, c, d) \
a ^= L(T, 3, byte(t)); t >>= 8;\
b ^= L(T, 2, byte(t)); t >>= 8;\
c ^= L(T, 1, byte(t)); t >>= 8;\
d ^= L(T, 0, t);
#define QUARTER_ROUND_LE(t, a, b, c, d) \
tempBlock[a] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
tempBlock[b] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
tempBlock[c] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
tempBlock[d] = ((byte *)(Te+t))[1];
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
#define QUARTER_ROUND_LD(t, a, b, c, d) \
tempBlock[a] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
tempBlock[b] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
tempBlock[c] = ((byte *)(Td+byte(t)))[GetNativeByteOrder()*7]; t >>= 8;\
tempBlock[d] = ((byte *)(Td+t))[GetNativeByteOrder()*7];
#else
#define QUARTER_ROUND_LD(t, a, b, c, d) \
tempBlock[a] = Sd[byte(t)]; t >>= 8;\
tempBlock[b] = Sd[byte(t)]; t >>= 8;\
tempBlock[c] = Sd[byte(t)]; t >>= 8;\
tempBlock[d] = Sd[t];
#endif
#define QUARTER_ROUND_E(t, a, b, c, d) QUARTER_ROUND(TL_M, Te, t, a, b, c, d)
#define QUARTER_ROUND_D(t, a, b, c, d) QUARTER_ROUND(TL_M, Td, t, a, b, c, d)
#ifdef IS_LITTLE_ENDIAN
#define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, d, c, b, a)
#define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, d, c, b, a)
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
#define TL_F(T, i, x) (*(word32 *)((byte *)T + x*8 + (6-i)%4+1))
#define TL_M(T, i, x) (*(word32 *)((byte *)T + x*8 + (i+3)%4+1))
#else
#define TL_F(T, i, x) rotrFixed(T[x], (3-i)*8)
#define TL_M(T, i, x) T[i*256 + x]
#endif
#else
#define QUARTER_ROUND_FE(t, a, b, c, d) QUARTER_ROUND(TL_F, Te, t, a, b, c, d)
#define QUARTER_ROUND_FD(t, a, b, c, d) QUARTER_ROUND(TL_F, Td, t, a, b, c, d)
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
#define TL_F(T, i, x) (*(word32 *)((byte *)T + x*8 + (4-i)%4))
#define TL_M TL_F
#else
#define TL_F(T, i, x) rotrFixed(T[x], i*8)
#define TL_M(T, i, x) T[i*256 + x]
#endif
#endif
#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
@ -108,7 +165,7 @@ void Rijndael::Base::FillEncTable()
}
#endif
}
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
Te[256] = Te[257] = 0;
#endif
s_TeFilled = true;
@ -119,7 +176,7 @@ void Rijndael::Base::FillDecTable()
for (int i=0; i<256; i++)
{
byte x = Sd[i];
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS_
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
word32 y = word32(fd(x))<<8 | word32(f9(x))<<16 | word32(fe(x))<<24;
Td[i] = word64(y | fb(x))<<32 | y | x;
#else
@ -202,29 +259,16 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
}
#define InverseMixColumn(x) x = TL_M(Td, 0, Se[GETBYTE(x, 3)]) ^ TL_M(Td, 1, Se[GETBYTE(x, 2)]) ^ TL_M(Td, 2, Se[GETBYTE(x, 1)]) ^ TL_M(Td, 3, Se[GETBYTE(x, 0)])
/* apply the inverse MixColumn transform to all round keys but the first and the last: */
for (i = 1; i < m_rounds; i++) {
rk += 4;
rk[0] =
Td[0*256+Se[GETBYTE(rk[0], 3)]] ^
Td[1*256+Se[GETBYTE(rk[0], 2)]] ^
Td[2*256+Se[GETBYTE(rk[0], 1)]] ^
Td[3*256+Se[GETBYTE(rk[0], 0)]];
rk[1] =
Td[0*256+Se[GETBYTE(rk[1], 3)]] ^
Td[1*256+Se[GETBYTE(rk[1], 2)]] ^
Td[2*256+Se[GETBYTE(rk[1], 1)]] ^
Td[3*256+Se[GETBYTE(rk[1], 0)]];
rk[2] =
Td[0*256+Se[GETBYTE(rk[2], 3)]] ^
Td[1*256+Se[GETBYTE(rk[2], 2)]] ^
Td[2*256+Se[GETBYTE(rk[2], 1)]] ^
Td[3*256+Se[GETBYTE(rk[2], 0)]];
rk[3] =
Td[0*256+Se[GETBYTE(rk[3], 3)]] ^
Td[1*256+Se[GETBYTE(rk[3], 2)]] ^
Td[2*256+Se[GETBYTE(rk[3], 1)]] ^
Td[3*256+Se[GETBYTE(rk[3], 0)]];
InverseMixColumn(rk[0]);
InverseMixColumn(rk[1]);
InverseMixColumn(rk[2]);
InverseMixColumn(rk[3]);
}
}
@ -232,6 +276,163 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keylen, c
ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
}
void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
if (HasSSE2())
{
Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
return;
}
#endif
typedef BlockGetAndPut<word32, NativeByteOrder> Block;
word32 s0, s1, s2, s3, t0, t1, t2, t3;
Block::Get(inBlock)(s0)(s1)(s2)(s3);
const word32 *rk = m_key;
s0 ^= rk[0];
s1 ^= rk[1];
s2 ^= rk[2];
s3 ^= rk[3];
t0 = rk[4];
t1 = rk[5];
t2 = rk[6];
t3 = rk[7];
rk += 8;
// timing attack countermeasure. see comments at top for more details
const int cacheLineSize = GetCacheLineSize();
unsigned int i;
word32 u = 0;
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
for (i=0; i<2048; i+=cacheLineSize)
#else
for (i=0; i<1024; i+=cacheLineSize)
#endif
u &= *(const word32 *)(((const byte *)Te)+i);
u &= Te[255];
s0 |= u; s1 |= u; s2 |= u; s3 |= u;
QUARTER_ROUND_FE(s3, t0, t1, t2, t3)
QUARTER_ROUND_FE(s2, t3, t0, t1, t2)
QUARTER_ROUND_FE(s1, t2, t3, t0, t1)
QUARTER_ROUND_FE(s0, t1, t2, t3, t0)
// Nr - 2 full rounds:
unsigned int r = m_rounds/2 - 1;
do
{
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
QUARTER_ROUND_E(t3, s0, s1, s2, s3)
QUARTER_ROUND_E(t2, s3, s0, s1, s2)
QUARTER_ROUND_E(t1, s2, s3, s0, s1)
QUARTER_ROUND_E(t0, s1, s2, s3, s0)
t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
QUARTER_ROUND_E(s3, t0, t1, t2, t3)
QUARTER_ROUND_E(s2, t3, t0, t1, t2)
QUARTER_ROUND_E(s1, t2, t3, t0, t1)
QUARTER_ROUND_E(s0, t1, t2, t3, t0)
rk += 8;
} while (--r);
word32 tbw[4];
byte *const tempBlock = (byte *)tbw;
QUARTER_ROUND_LE(t2, 15, 2, 5, 8)
QUARTER_ROUND_LE(t1, 11, 14, 1, 4)
QUARTER_ROUND_LE(t0, 7, 10, 13, 0)
QUARTER_ROUND_LE(t3, 3, 6, 9, 12)
Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
}
void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
typedef BlockGetAndPut<word32, NativeByteOrder> Block;
word32 s0, s1, s2, s3, t0, t1, t2, t3;
Block::Get(inBlock)(s0)(s1)(s2)(s3);
const word32 *rk = m_key;
s0 ^= rk[0];
s1 ^= rk[1];
s2 ^= rk[2];
s3 ^= rk[3];
t0 = rk[4];
t1 = rk[5];
t2 = rk[6];
t3 = rk[7];
rk += 8;
// timing attack countermeasure. see comments at top for more details
const int cacheLineSize = GetCacheLineSize();
unsigned int i;
word32 u = 0;
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
for (i=0; i<2048; i+=cacheLineSize)
#else
for (i=0; i<1024; i+=cacheLineSize)
#endif
u &= *(const word32 *)(((const byte *)Td)+i);
u &= Td[255];
s0 |= u; s1 |= u; s2 |= u; s3 |= u;
QUARTER_ROUND_FD(s3, t2, t1, t0, t3)
QUARTER_ROUND_FD(s2, t1, t0, t3, t2)
QUARTER_ROUND_FD(s1, t0, t3, t2, t1)
QUARTER_ROUND_FD(s0, t3, t2, t1, t0)
// Nr - 2 full rounds:
unsigned int r = m_rounds/2 - 1;
do
{
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
QUARTER_ROUND_D(t3, s2, s1, s0, s3)
QUARTER_ROUND_D(t2, s1, s0, s3, s2)
QUARTER_ROUND_D(t1, s0, s3, s2, s1)
QUARTER_ROUND_D(t0, s3, s2, s1, s0)
t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
QUARTER_ROUND_D(s3, t2, t1, t0, t3)
QUARTER_ROUND_D(s2, t1, t0, t3, t2)
QUARTER_ROUND_D(s1, t0, t3, t2, t1)
QUARTER_ROUND_D(s0, t3, t2, t1, t0)
rk += 8;
} while (--r);
#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
// timing attack countermeasure. see comments at top for more details
// If CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is defined,
// QUARTER_ROUND_LD will use Td, which is already preloaded.
u = 0;
for (i=0; i<256; i+=cacheLineSize)
u &= *(const word32 *)(Sd+i);
u &= *(const word32 *)(Sd+252);
t0 |= u; t1 |= u; t2 |= u; t3 |= u;
#endif
word32 tbw[4];
byte *const tempBlock = (byte *)tbw;
QUARTER_ROUND_LD(t2, 7, 2, 13, 8)
QUARTER_ROUND_LD(t1, 3, 14, 9, 4)
QUARTER_ROUND_LD(t0, 15, 10, 5, 0)
QUARTER_ROUND_LD(t3, 11, 6, 1, 12)
Block::Put(xorBlock, outBlock)(tbw[0]^rk[0])(tbw[1]^rk[1])(tbw[2]^rk[2])(tbw[3]^rk[3]);
}
// ************************* Assembly Code ************************************
#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
@ -750,247 +951,6 @@ size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xo
#endif
void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
if (HasSSE2())
{
Rijndael::Enc::AdvancedProcessBlocks(inBlock, xorBlock, outBlock, 16, 0);
return;
}
#endif
word32 s0, s1, s2, s3, t0, t1, t2, t3;
const word32 *rk = m_key;
s0 = ((const word32 *)inBlock)[0] ^ rk[0];
s1 = ((const word32 *)inBlock)[1] ^ rk[1];
s2 = ((const word32 *)inBlock)[2] ^ rk[2];
s3 = ((const word32 *)inBlock)[3] ^ rk[3];
t0 = rk[4];
t1 = rk[5];
t2 = rk[6];
t3 = rk[7];
rk += 8;
// timing attack countermeasure. see comments at top for more details
const int cacheLineSize = GetCacheLineSize();
unsigned int i;
word32 u = 0;
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
for (i=0; i<2048; i+=cacheLineSize)
#else
for (i=0; i<1024; i+=cacheLineSize)
#endif
u &= *(const word32 *)(((const byte *)Te)+i);
u &= Te[255];
s0 |= u; s1 |= u; s2 |= u; s3 |= u;
#define QUARTER_ROUND(t, a, b, c, d) \
a ^= TL(3, byte(t)); t >>= 8;\
b ^= TL(2, byte(t)); t >>= 8;\
c ^= TL(1, byte(t)); t >>= 8;\
d ^= TL(0, t);
#ifdef IS_LITTLE_ENDIAN
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
#define TL(i, x) (*(word32 *)((byte *)Te + x*8 + (6-i)%4+1))
#else
#define TL(i, x) rotrFixed(Te[x], (3-i)*8)
#endif
#define QUARTER_ROUND1(t, a, b, c, d) QUARTER_ROUND(t, d, c, b, a)
#else
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
#define TL(i, x) (*(word32 *)((byte *)Te + x*8 + (4-i)%4))
#else
#define TL(i, x) rotrFixed(Te[x], i*8)
#endif
#define QUARTER_ROUND1 QUARTER_ROUND
#endif
QUARTER_ROUND1(s3, t0, t1, t2, t3)
QUARTER_ROUND1(s2, t3, t0, t1, t2)
QUARTER_ROUND1(s1, t2, t3, t0, t1)
QUARTER_ROUND1(s0, t1, t2, t3, t0)
#if defined(CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS) && defined(IS_LITTLE_ENDIAN)
#undef TL
#define TL(i, x) (*(word32 *)((byte *)Te + x*8 + (i+3)%4+1))
#endif
#ifndef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
#undef TL
#define TL(i, x) Te[i*256 + x]
#endif
// Nr - 2 full rounds:
unsigned int r = m_rounds/2 - 1;
do
{
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
QUARTER_ROUND(t3, s0, s1, s2, s3)
QUARTER_ROUND(t2, s3, s0, s1, s2)
QUARTER_ROUND(t1, s2, s3, s0, s1)
QUARTER_ROUND(t0, s1, s2, s3, s0)
t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
QUARTER_ROUND(s3, t0, t1, t2, t3)
QUARTER_ROUND(s2, t3, t0, t1, t2)
QUARTER_ROUND(s1, t2, t3, t0, t1)
QUARTER_ROUND(s0, t1, t2, t3, t0)
#undef QUARTER_ROUND
rk += 8;
} while (--r);
word32 tbw[4];
byte *const tempBlock = (byte *)tbw;
word32 *const obw = (word32 *)outBlock;
const word32 *const xbw = (const word32 *)xorBlock;
#define QUARTER_ROUND(t, a, b, c, d) \
tempBlock[a] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
tempBlock[b] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
tempBlock[c] = ((byte *)(Te+byte(t)))[1]; t >>= 8;\
tempBlock[d] = ((byte *)(Te+t))[1];
QUARTER_ROUND(t2, 15, 2, 5, 8)
QUARTER_ROUND(t1, 11, 14, 1, 4)
QUARTER_ROUND(t0, 7, 10, 13, 0)
QUARTER_ROUND(t3, 3, 6, 9, 12)
#undef QUARTER_ROUND
if (xbw)
{
obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
}
else
{
obw[0] = tbw[0] ^ rk[0];
obw[1] = tbw[1] ^ rk[1];
obw[2] = tbw[2] ^ rk[2];
obw[3] = tbw[3] ^ rk[3];
}
}
void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
word32 s0, s1, s2, s3, t0, t1, t2, t3;
const word32 *rk = m_key;
s0 = ((const word32 *)inBlock)[0] ^ rk[0];
s1 = ((const word32 *)inBlock)[1] ^ rk[1];
s2 = ((const word32 *)inBlock)[2] ^ rk[2];
s3 = ((const word32 *)inBlock)[3] ^ rk[3];
t0 = rk[4];
t1 = rk[5];
t2 = rk[6];
t3 = rk[7];
rk += 8;
// timing attack countermeasure. see comments at top for more details
const int cacheLineSize = GetCacheLineSize();
unsigned int i;
word32 u = 0;
for (i=0; i<1024; i+=cacheLineSize)
u &= *(const word32 *)(((const byte *)Td)+i);
u &= Td[255];
s0 |= u; s1 |= u; s2 |= u; s3 |= u;
// first round
#ifdef IS_BIG_ENDIAN
#define QUARTER_ROUND(t, a, b, c, d) \
a ^= rotrFixed(Td[byte(t)], 24); t >>= 8;\
b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
d ^= Td[t];
#else
#define QUARTER_ROUND(t, a, b, c, d) \
d ^= Td[byte(t)]; t >>= 8;\
c ^= rotrFixed(Td[byte(t)], 8); t >>= 8;\
b ^= rotrFixed(Td[byte(t)], 16); t >>= 8;\
a ^= rotrFixed(Td[t], 24);
#endif
QUARTER_ROUND(s3, t2, t1, t0, t3)
QUARTER_ROUND(s2, t1, t0, t3, t2)
QUARTER_ROUND(s1, t0, t3, t2, t1)
QUARTER_ROUND(s0, t3, t2, t1, t0)
#undef QUARTER_ROUND
// Nr - 2 full rounds:
unsigned int r = m_rounds/2 - 1;
do
{
#define QUARTER_ROUND(t, a, b, c, d) \
a ^= Td[3*256+byte(t)]; t >>= 8;\
b ^= Td[2*256+byte(t)]; t >>= 8;\
c ^= Td[1*256+byte(t)]; t >>= 8;\
d ^= Td[t];
s0 = rk[0]; s1 = rk[1]; s2 = rk[2]; s3 = rk[3];
QUARTER_ROUND(t3, s2, s1, s0, s3)
QUARTER_ROUND(t2, s1, s0, s3, s2)
QUARTER_ROUND(t1, s0, s3, s2, s1)
QUARTER_ROUND(t0, s3, s2, s1, s0)
t0 = rk[4]; t1 = rk[5]; t2 = rk[6]; t3 = rk[7];
QUARTER_ROUND(s3, t2, t1, t0, t3)
QUARTER_ROUND(s2, t1, t0, t3, t2)
QUARTER_ROUND(s1, t0, t3, t2, t1)
QUARTER_ROUND(s0, t3, t2, t1, t0)
#undef QUARTER_ROUND
rk += 8;
} while (--r);
// timing attack countermeasure. see comments at top for more details
u = 0;
for (i=0; i<256; i+=cacheLineSize)
u &= *(const word32 *)(Sd+i);
u &= *(const word32 *)(Sd+252);
t0 |= u; t1 |= u; t2 |= u; t3 |= u;
word32 tbw[4];
byte *const tempBlock = (byte *)tbw;
word32 *const obw = (word32 *)outBlock;
const word32 *const xbw = (const word32 *)xorBlock;
#define QUARTER_ROUND(t, a, b, c, d) \
tempBlock[a] = Sd[byte(t)]; t >>= 8;\
tempBlock[b] = Sd[byte(t)]; t >>= 8;\
tempBlock[c] = Sd[byte(t)]; t >>= 8;\
tempBlock[d] = Sd[t];
QUARTER_ROUND(t2, 7, 2, 13, 8)
QUARTER_ROUND(t1, 3, 14, 9, 4)
QUARTER_ROUND(t0, 15, 10, 5, 0)
QUARTER_ROUND(t3, 11, 6, 1, 12)
#undef QUARTER_ROUND
if (xbw)
{
obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
}
else
{
obw[0] = tbw[0] ^ rk[0];
obw[1] = tbw[1] ^ rk[1];
obw[2] = tbw[2] ^ rk[2];
obw[3] = tbw[3] ^ rk[3];
}
}
NAMESPACE_END
#endif

View File

@ -158,6 +158,19 @@ bool TestSettings()
pass = false;
}
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
byte testvals[10] = {1,2,2,3,3,3,3,2,2,1};
if (*(word32 *)(testvals+3) == 0x03030303 && *(word64 *)(testvals+1) == W64LIT(0x0202030303030202))
cout << "passed: Your machine allows unaligned data access.\n";
else
{
cout << "FAILED: Unaligned data access gave incorrect results.\n";
pass = false;
}
#else
cout << "passed: CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS is not defined. Will restrict to aligned data access.\n";
#endif
if (sizeof(byte) == 1)
cout << "passed: ";
else