mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2025-02-07 13:38:28 +00:00
![Jeffrey Walton](/assets/img/avatar_default.png)
Commit afbd3e60f68f effectively treated a symptom and not the underlying problem. The problem was linkers on 32-bit systems ignore CRYPTOPP_ALIGN_DAT(16) passed down by the compiler and align to 8-bytes or less. We have to use Wei's original code in some places. It is not a bad thing, but the bit fiddling is something we would like to contain a little more by depending more on language or platform features. This commit keeps the original changes which improve partial specializations; but fixes 32-bit linker behavior by effectively reverting afbd3e60f68f and e054d36dc88d00. We also add more comments so the next person has understands why things are done they way they are.
283 lines
6.4 KiB
C++
283 lines
6.4 KiB
C++
// tiger.cpp - originally written and placed in the public domain by Wei Dai
|
|
|
|
#include "pch.h"
|
|
#include "config.h"
|
|
|
|
#include "tiger.h"
|
|
#include "misc.h"
|
|
#include "cpu.h"
|
|
|
|
#if defined(CRYPTOPP_DISABLE_TIGER_ASM)
|
|
# undef CRYPTOPP_X86_ASM_AVAILABLE
|
|
# undef CRYPTOPP_X32_ASM_AVAILABLE
|
|
# undef CRYPTOPP_X64_ASM_AVAILABLE
|
|
# undef CRYPTOPP_SSE2_ASM_AVAILABLE
|
|
#endif
|
|
|
|
NAMESPACE_BEGIN(CryptoPP)
|
|
|
|
std::string Tiger::AlgorithmProvider() const
|
|
{
|
|
#ifndef CRYPTOPP_DISABLE_TIGER_ASM
|
|
# if CRYPTOPP_SSE2_ASM_AVAILABLE
|
|
if (HasSSE2())
|
|
return "SSE2";
|
|
# endif
|
|
#endif
|
|
return "C++";
|
|
}
|
|
|
|
void Tiger::InitState(HashWordType *state)
|
|
{
|
|
state[0] = W64LIT(0x0123456789ABCDEF);
|
|
state[1] = W64LIT(0xFEDCBA9876543210);
|
|
state[2] = W64LIT(0xF096A5B4C3B2E187);
|
|
}
|
|
|
|
void Tiger::TruncatedFinal(byte *hash, size_t size)
|
|
{
|
|
CRYPTOPP_ASSERT(hash != NULLPTR);
|
|
ThrowIfInvalidTruncatedSize(size);
|
|
|
|
PadLastBlock(56, 0x01);
|
|
CorrectEndianess(m_data, m_data, 56);
|
|
|
|
m_data[7] = GetBitCountLo();
|
|
|
|
Transform(m_state, m_data);
|
|
CorrectEndianess(m_state, m_state, DigestSize());
|
|
memcpy(hash, m_state, size);
|
|
|
|
Restart(); // reinit for next use
|
|
}
|
|
|
|
void Tiger::Transform (word64 *state, const word64 *data)
|
|
{
|
|
#if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
|
|
if (HasSSE2())
|
|
{
|
|
#ifdef __GNUC__
|
|
__asm__ __volatile__
|
|
(
|
|
INTEL_NOPREFIX
|
|
AS_PUSH_IF86(bx)
|
|
#else
|
|
AS2( lea edx, [table])
|
|
AS2( mov eax, state)
|
|
AS2( mov esi, data)
|
|
#endif
|
|
AS2( movq mm0, [eax])
|
|
AS2( movq mm1, [eax+1*8])
|
|
AS2( movq mm5, mm1)
|
|
AS2( movq mm2, [eax+2*8])
|
|
AS2( movq mm7, [edx+4*2048+0*8])
|
|
AS2( movq mm6, [edx+4*2048+1*8])
|
|
AS2( mov ecx, esp)
|
|
AS2( and esp, 0xfffffff0)
|
|
AS2( sub esp, 8*8)
|
|
AS_PUSH_IF86(cx)
|
|
|
|
#define SSE2_round(a,b,c,x,mul) \
|
|
AS2( pxor c, [x])\
|
|
AS2( movd ecx, c)\
|
|
AS2( movzx edi, cl)\
|
|
AS2( movq mm3, [edx+0*2048+edi*8])\
|
|
AS2( movzx edi, ch)\
|
|
AS2( movq mm4, [edx+3*2048+edi*8])\
|
|
AS2( shr ecx, 16)\
|
|
AS2( movzx edi, cl)\
|
|
AS2( pxor mm3, [edx+1*2048+edi*8])\
|
|
AS2( movzx edi, ch)\
|
|
AS2( pxor mm4, [edx+2*2048+edi*8])\
|
|
AS3( pextrw ecx, c, 2)\
|
|
AS2( movzx edi, cl)\
|
|
AS2( pxor mm3, [edx+2*2048+edi*8])\
|
|
AS2( movzx edi, ch)\
|
|
AS2( pxor mm4, [edx+1*2048+edi*8])\
|
|
AS3( pextrw ecx, c, 3)\
|
|
AS2( movzx edi, cl)\
|
|
AS2( pxor mm3, [edx+3*2048+edi*8])\
|
|
AS2( psubq a, mm3)\
|
|
AS2( movzx edi, ch)\
|
|
AS2( pxor mm4, [edx+0*2048+edi*8])\
|
|
AS2( paddq b, mm4)\
|
|
SSE2_mul_##mul(b)
|
|
|
|
#define SSE2_mul_5(b) \
|
|
AS2( movq mm3, b)\
|
|
AS2( psllq b, 2)\
|
|
AS2( paddq b, mm3)
|
|
|
|
#define SSE2_mul_7(b) \
|
|
AS2( movq mm3, b)\
|
|
AS2( psllq b, 3)\
|
|
AS2( psubq b, mm3)
|
|
|
|
#define SSE2_mul_9(b) \
|
|
AS2( movq mm3, b)\
|
|
AS2( psllq b, 3)\
|
|
AS2( paddq b, mm3)
|
|
|
|
#define label2_5 1
|
|
#define label2_7 2
|
|
#define label2_9 3
|
|
|
|
#define SSE2_pass(A,B,C,mul,X) \
|
|
AS2( xor ebx, ebx)\
|
|
ASL(mul)\
|
|
SSE2_round(A,B,C,X+0*8+ebx,mul)\
|
|
SSE2_round(B,C,A,X+1*8+ebx,mul)\
|
|
AS2( cmp ebx, 6*8)\
|
|
ASJ( je, label2_##mul, f)\
|
|
SSE2_round(C,A,B,X+2*8+ebx,mul)\
|
|
AS2( add ebx, 3*8)\
|
|
ASJ( jmp, mul, b)\
|
|
ASL(label2_##mul)
|
|
|
|
#define SSE2_key_schedule(Y,X) \
|
|
AS2( movq mm3, [X+7*8])\
|
|
AS2( pxor mm3, mm6)\
|
|
AS2( movq mm4, [X+0*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+0*8], mm4)\
|
|
AS2( pxor mm4, [X+1*8])\
|
|
AS2( movq mm3, mm4)\
|
|
AS2( movq [Y+1*8], mm4)\
|
|
AS2( paddq mm4, [X+2*8])\
|
|
AS2( pxor mm3, mm7)\
|
|
AS2( psllq mm3, 19)\
|
|
AS2( movq [Y+2*8], mm4)\
|
|
AS2( pxor mm3, mm4)\
|
|
AS2( movq mm4, [X+3*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+3*8], mm4)\
|
|
AS2( pxor mm4, [X+4*8])\
|
|
AS2( movq mm3, mm4)\
|
|
AS2( movq [Y+4*8], mm4)\
|
|
AS2( paddq mm4, [X+5*8])\
|
|
AS2( pxor mm3, mm7)\
|
|
AS2( psrlq mm3, 23)\
|
|
AS2( movq [Y+5*8], mm4)\
|
|
AS2( pxor mm3, mm4)\
|
|
AS2( movq mm4, [X+6*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+6*8], mm4)\
|
|
AS2( pxor mm4, [X+7*8])\
|
|
AS2( movq mm3, mm4)\
|
|
AS2( movq [Y+7*8], mm4)\
|
|
AS2( paddq mm4, [Y+0*8])\
|
|
AS2( pxor mm3, mm7)\
|
|
AS2( psllq mm3, 19)\
|
|
AS2( movq [Y+0*8], mm4)\
|
|
AS2( pxor mm3, mm4)\
|
|
AS2( movq mm4, [Y+1*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+1*8], mm4)\
|
|
AS2( pxor mm4, [Y+2*8])\
|
|
AS2( movq mm3, mm4)\
|
|
AS2( movq [Y+2*8], mm4)\
|
|
AS2( paddq mm4, [Y+3*8])\
|
|
AS2( pxor mm3, mm7)\
|
|
AS2( psrlq mm3, 23)\
|
|
AS2( movq [Y+3*8], mm4)\
|
|
AS2( pxor mm3, mm4)\
|
|
AS2( movq mm4, [Y+4*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+4*8], mm4)\
|
|
AS2( pxor mm4, [Y+5*8])\
|
|
AS2( movq [Y+5*8], mm4)\
|
|
AS2( paddq mm4, [Y+6*8])\
|
|
AS2( movq [Y+6*8], mm4)\
|
|
AS2( pxor mm4, [edx+4*2048+2*8])\
|
|
AS2( movq mm3, [Y+7*8])\
|
|
AS2( psubq mm3, mm4)\
|
|
AS2( movq [Y+7*8], mm3)
|
|
|
|
SSE2_pass(mm0, mm1, mm2, 5, esi)
|
|
SSE2_key_schedule(esp+4, esi)
|
|
SSE2_pass(mm2, mm0, mm1, 7, esp+4)
|
|
SSE2_key_schedule(esp+4, esp+4)
|
|
SSE2_pass(mm1, mm2, mm0, 9, esp+4)
|
|
|
|
AS2( pxor mm0, [eax+0*8])
|
|
AS2( movq [eax+0*8], mm0)
|
|
AS2( psubq mm1, mm5)
|
|
AS2( movq [eax+1*8], mm1)
|
|
AS2( paddq mm2, [eax+2*8])
|
|
AS2( movq [eax+2*8], mm2)
|
|
|
|
AS_POP_IF86(sp)
|
|
AS1( emms)
|
|
|
|
#ifdef __GNUC__
|
|
AS_POP_IF86(bx)
|
|
ATT_PREFIX
|
|
:
|
|
: "a" (state), "S" (data), "d" (table)
|
|
: "%ecx", "%edi", "memory", "cc"
|
|
);
|
|
#endif
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
word64 a = state[0];
|
|
word64 b = state[1];
|
|
word64 c = state[2];
|
|
word64 Y[8];
|
|
|
|
#define t1 (table)
|
|
#define t2 (table+256)
|
|
#define t3 (table+256*2)
|
|
#define t4 (table+256*3)
|
|
|
|
#define round(a,b,c,x,mul) \
|
|
c ^= x; \
|
|
a -= t1[GETBYTE(c,0)] ^ t2[GETBYTE(c,2)] ^ t3[GETBYTE(c,4)] ^ t4[GETBYTE(c,6)]; \
|
|
b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \
|
|
b *= mul
|
|
|
|
#define pass(a,b,c,mul,X) {\
|
|
int i=0;\
|
|
while (true)\
|
|
{\
|
|
round(a,b,c,X[i+0],mul); \
|
|
round(b,c,a,X[i+1],mul); \
|
|
if (i==6)\
|
|
break;\
|
|
round(c,a,b,X[i+2],mul); \
|
|
i+=3;\
|
|
}}
|
|
|
|
#define key_schedule(Y,X) \
|
|
Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \
|
|
Y[1] = X[1] ^ Y[0]; \
|
|
Y[2] = X[2] + Y[1]; \
|
|
Y[3] = X[3] - (Y[2] ^ ((~Y[1])<<19)); \
|
|
Y[4] = X[4] ^ Y[3]; \
|
|
Y[5] = X[5] + Y[4]; \
|
|
Y[6] = X[6] - (Y[5] ^ ((~Y[4])>>23)); \
|
|
Y[7] = X[7] ^ Y[6]; \
|
|
Y[0] += Y[7]; \
|
|
Y[1] -= Y[0] ^ ((~Y[7])<<19); \
|
|
Y[2] ^= Y[1]; \
|
|
Y[3] += Y[2]; \
|
|
Y[4] -= Y[3] ^ ((~Y[2])>>23); \
|
|
Y[5] ^= Y[4]; \
|
|
Y[6] += Y[5]; \
|
|
Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF)
|
|
|
|
pass(a,b,c,5,data);
|
|
key_schedule(Y,data);
|
|
pass(c,a,b,7,Y);
|
|
key_schedule(Y,Y);
|
|
pass(b,c,a,9,Y);
|
|
|
|
state[0] = a ^ state[0];
|
|
state[1] = b - state[1];
|
|
state[2] = c + state[2];
|
|
}
|
|
}
|
|
|
|
NAMESPACE_END
|