mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-24 02:19:41 +00:00
1bbbfb6b75
Commitafbd3e60f6
effectively treated a symptom and not the underlying problem. The problem was linkers on 32-bit systems ignore CRYPTOPP_ALIGN_DAT(16) passed down by the compiler and align to 8-bytes or less. We have to use Wei's original code in some places. It is not a bad thing, but the bit fiddling is something we would like to contain a little more by depending more on language or platform features. This commit keeps the original changes which improve partial specializations; but fixes 32-bit linker behavior by effectively revertingafbd3e60f6
ande054d36dc8
. We also add more comments so the next person has understands why things are done they way they are.
283 lines
6.4 KiB
C++
283 lines
6.4 KiB
C++
// tiger.cpp - originally written and placed in the public domain by Wei Dai
|
|
|
|
#include "pch.h"
|
|
#include "config.h"
|
|
|
|
#include "tiger.h"
|
|
#include "misc.h"
|
|
#include "cpu.h"
|
|
|
|
#if defined(CRYPTOPP_DISABLE_TIGER_ASM)
|
|
# undef CRYPTOPP_X86_ASM_AVAILABLE
|
|
# undef CRYPTOPP_X32_ASM_AVAILABLE
|
|
# undef CRYPTOPP_X64_ASM_AVAILABLE
|
|
# undef CRYPTOPP_SSE2_ASM_AVAILABLE
|
|
#endif
|
|
|
|
NAMESPACE_BEGIN(CryptoPP)
|
|
|
|
std::string Tiger::AlgorithmProvider() const
|
|
{
|
|
#ifndef CRYPTOPP_DISABLE_TIGER_ASM
|
|
# if CRYPTOPP_SSE2_ASM_AVAILABLE
|
|
if (HasSSE2())
|
|
return "SSE2";
|
|
# endif
|
|
#endif
|
|
return "C++";
|
|
}
|
|
|
|
void Tiger::InitState(HashWordType *state)
|
|
{
|
|
state[0] = W64LIT(0x0123456789ABCDEF);
|
|
state[1] = W64LIT(0xFEDCBA9876543210);
|
|
state[2] = W64LIT(0xF096A5B4C3B2E187);
|
|
}
|
|
|
|
void Tiger::TruncatedFinal(byte *hash, size_t size)
|
|
{
|
|
CRYPTOPP_ASSERT(hash != NULLPTR);
|
|
ThrowIfInvalidTruncatedSize(size);
|
|
|
|
PadLastBlock(56, 0x01);
|
|
CorrectEndianess(m_data, m_data, 56);
|
|
|
|
m_data[7] = GetBitCountLo();
|
|
|
|
Transform(m_state, m_data);
|
|
CorrectEndianess(m_state, m_state, DigestSize());
|
|
memcpy(hash, m_state, size);
|
|
|
|
Restart(); // reinit for next use
|
|
}
|
|
|
|
void Tiger::Transform (word64 *state, const word64 *data)
|
|
{
|
|
#if CRYPTOPP_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86
|
|
if (HasSSE2())
|
|
{
|
|
#ifdef __GNUC__
|
|
__asm__ __volatile__
|
|
(
|
|
INTEL_NOPREFIX
|
|
AS_PUSH_IF86(bx)
|
|
#else
|
|
AS2( lea edx, [table])
|
|
AS2( mov eax, state)
|
|
AS2( mov esi, data)
|
|
#endif
|
|
AS2( movq mm0, [eax])
|
|
AS2( movq mm1, [eax+1*8])
|
|
AS2( movq mm5, mm1)
|
|
AS2( movq mm2, [eax+2*8])
|
|
AS2( movq mm7, [edx+4*2048+0*8])
|
|
AS2( movq mm6, [edx+4*2048+1*8])
|
|
AS2( mov ecx, esp)
|
|
AS2( and esp, 0xfffffff0)
|
|
AS2( sub esp, 8*8)
|
|
AS_PUSH_IF86(cx)
|
|
|
|
#define SSE2_round(a,b,c,x,mul) \
|
|
AS2( pxor c, [x])\
|
|
AS2( movd ecx, c)\
|
|
AS2( movzx edi, cl)\
|
|
AS2( movq mm3, [edx+0*2048+edi*8])\
|
|
AS2( movzx edi, ch)\
|
|
AS2( movq mm4, [edx+3*2048+edi*8])\
|
|
AS2( shr ecx, 16)\
|
|
AS2( movzx edi, cl)\
|
|
AS2( pxor mm3, [edx+1*2048+edi*8])\
|
|
AS2( movzx edi, ch)\
|
|
AS2( pxor mm4, [edx+2*2048+edi*8])\
|
|
AS3( pextrw ecx, c, 2)\
|
|
AS2( movzx edi, cl)\
|
|
AS2( pxor mm3, [edx+2*2048+edi*8])\
|
|
AS2( movzx edi, ch)\
|
|
AS2( pxor mm4, [edx+1*2048+edi*8])\
|
|
AS3( pextrw ecx, c, 3)\
|
|
AS2( movzx edi, cl)\
|
|
AS2( pxor mm3, [edx+3*2048+edi*8])\
|
|
AS2( psubq a, mm3)\
|
|
AS2( movzx edi, ch)\
|
|
AS2( pxor mm4, [edx+0*2048+edi*8])\
|
|
AS2( paddq b, mm4)\
|
|
SSE2_mul_##mul(b)
|
|
|
|
#define SSE2_mul_5(b) \
|
|
AS2( movq mm3, b)\
|
|
AS2( psllq b, 2)\
|
|
AS2( paddq b, mm3)
|
|
|
|
#define SSE2_mul_7(b) \
|
|
AS2( movq mm3, b)\
|
|
AS2( psllq b, 3)\
|
|
AS2( psubq b, mm3)
|
|
|
|
#define SSE2_mul_9(b) \
|
|
AS2( movq mm3, b)\
|
|
AS2( psllq b, 3)\
|
|
AS2( paddq b, mm3)
|
|
|
|
#define label2_5 1
|
|
#define label2_7 2
|
|
#define label2_9 3
|
|
|
|
#define SSE2_pass(A,B,C,mul,X) \
|
|
AS2( xor ebx, ebx)\
|
|
ASL(mul)\
|
|
SSE2_round(A,B,C,X+0*8+ebx,mul)\
|
|
SSE2_round(B,C,A,X+1*8+ebx,mul)\
|
|
AS2( cmp ebx, 6*8)\
|
|
ASJ( je, label2_##mul, f)\
|
|
SSE2_round(C,A,B,X+2*8+ebx,mul)\
|
|
AS2( add ebx, 3*8)\
|
|
ASJ( jmp, mul, b)\
|
|
ASL(label2_##mul)
|
|
|
|
#define SSE2_key_schedule(Y,X) \
|
|
AS2( movq mm3, [X+7*8])\
|
|
AS2( pxor mm3, mm6)\
|
|
AS2( movq mm4, [X+0*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+0*8], mm4)\
|
|
AS2( pxor mm4, [X+1*8])\
|
|
AS2( movq mm3, mm4)\
|
|
AS2( movq [Y+1*8], mm4)\
|
|
AS2( paddq mm4, [X+2*8])\
|
|
AS2( pxor mm3, mm7)\
|
|
AS2( psllq mm3, 19)\
|
|
AS2( movq [Y+2*8], mm4)\
|
|
AS2( pxor mm3, mm4)\
|
|
AS2( movq mm4, [X+3*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+3*8], mm4)\
|
|
AS2( pxor mm4, [X+4*8])\
|
|
AS2( movq mm3, mm4)\
|
|
AS2( movq [Y+4*8], mm4)\
|
|
AS2( paddq mm4, [X+5*8])\
|
|
AS2( pxor mm3, mm7)\
|
|
AS2( psrlq mm3, 23)\
|
|
AS2( movq [Y+5*8], mm4)\
|
|
AS2( pxor mm3, mm4)\
|
|
AS2( movq mm4, [X+6*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+6*8], mm4)\
|
|
AS2( pxor mm4, [X+7*8])\
|
|
AS2( movq mm3, mm4)\
|
|
AS2( movq [Y+7*8], mm4)\
|
|
AS2( paddq mm4, [Y+0*8])\
|
|
AS2( pxor mm3, mm7)\
|
|
AS2( psllq mm3, 19)\
|
|
AS2( movq [Y+0*8], mm4)\
|
|
AS2( pxor mm3, mm4)\
|
|
AS2( movq mm4, [Y+1*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+1*8], mm4)\
|
|
AS2( pxor mm4, [Y+2*8])\
|
|
AS2( movq mm3, mm4)\
|
|
AS2( movq [Y+2*8], mm4)\
|
|
AS2( paddq mm4, [Y+3*8])\
|
|
AS2( pxor mm3, mm7)\
|
|
AS2( psrlq mm3, 23)\
|
|
AS2( movq [Y+3*8], mm4)\
|
|
AS2( pxor mm3, mm4)\
|
|
AS2( movq mm4, [Y+4*8])\
|
|
AS2( psubq mm4, mm3)\
|
|
AS2( movq [Y+4*8], mm4)\
|
|
AS2( pxor mm4, [Y+5*8])\
|
|
AS2( movq [Y+5*8], mm4)\
|
|
AS2( paddq mm4, [Y+6*8])\
|
|
AS2( movq [Y+6*8], mm4)\
|
|
AS2( pxor mm4, [edx+4*2048+2*8])\
|
|
AS2( movq mm3, [Y+7*8])\
|
|
AS2( psubq mm3, mm4)\
|
|
AS2( movq [Y+7*8], mm3)
|
|
|
|
SSE2_pass(mm0, mm1, mm2, 5, esi)
|
|
SSE2_key_schedule(esp+4, esi)
|
|
SSE2_pass(mm2, mm0, mm1, 7, esp+4)
|
|
SSE2_key_schedule(esp+4, esp+4)
|
|
SSE2_pass(mm1, mm2, mm0, 9, esp+4)
|
|
|
|
AS2( pxor mm0, [eax+0*8])
|
|
AS2( movq [eax+0*8], mm0)
|
|
AS2( psubq mm1, mm5)
|
|
AS2( movq [eax+1*8], mm1)
|
|
AS2( paddq mm2, [eax+2*8])
|
|
AS2( movq [eax+2*8], mm2)
|
|
|
|
AS_POP_IF86(sp)
|
|
AS1( emms)
|
|
|
|
#ifdef __GNUC__
|
|
AS_POP_IF86(bx)
|
|
ATT_PREFIX
|
|
:
|
|
: "a" (state), "S" (data), "d" (table)
|
|
: "%ecx", "%edi", "memory", "cc"
|
|
);
|
|
#endif
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
word64 a = state[0];
|
|
word64 b = state[1];
|
|
word64 c = state[2];
|
|
word64 Y[8];
|
|
|
|
#define t1 (table)
|
|
#define t2 (table+256)
|
|
#define t3 (table+256*2)
|
|
#define t4 (table+256*3)
|
|
|
|
#define round(a,b,c,x,mul) \
|
|
c ^= x; \
|
|
a -= t1[GETBYTE(c,0)] ^ t2[GETBYTE(c,2)] ^ t3[GETBYTE(c,4)] ^ t4[GETBYTE(c,6)]; \
|
|
b += t4[GETBYTE(c,1)] ^ t3[GETBYTE(c,3)] ^ t2[GETBYTE(c,5)] ^ t1[GETBYTE(c,7)]; \
|
|
b *= mul
|
|
|
|
#define pass(a,b,c,mul,X) {\
|
|
int i=0;\
|
|
while (true)\
|
|
{\
|
|
round(a,b,c,X[i+0],mul); \
|
|
round(b,c,a,X[i+1],mul); \
|
|
if (i==6)\
|
|
break;\
|
|
round(c,a,b,X[i+2],mul); \
|
|
i+=3;\
|
|
}}
|
|
|
|
#define key_schedule(Y,X) \
|
|
Y[0] = X[0] - (X[7]^W64LIT(0xA5A5A5A5A5A5A5A5)); \
|
|
Y[1] = X[1] ^ Y[0]; \
|
|
Y[2] = X[2] + Y[1]; \
|
|
Y[3] = X[3] - (Y[2] ^ ((~Y[1])<<19)); \
|
|
Y[4] = X[4] ^ Y[3]; \
|
|
Y[5] = X[5] + Y[4]; \
|
|
Y[6] = X[6] - (Y[5] ^ ((~Y[4])>>23)); \
|
|
Y[7] = X[7] ^ Y[6]; \
|
|
Y[0] += Y[7]; \
|
|
Y[1] -= Y[0] ^ ((~Y[7])<<19); \
|
|
Y[2] ^= Y[1]; \
|
|
Y[3] += Y[2]; \
|
|
Y[4] -= Y[3] ^ ((~Y[2])>>23); \
|
|
Y[5] ^= Y[4]; \
|
|
Y[6] += Y[5]; \
|
|
Y[7] -= Y[6] ^ W64LIT(0x0123456789ABCDEF)
|
|
|
|
pass(a,b,c,5,data);
|
|
key_schedule(Y,data);
|
|
pass(c,a,b,7,Y);
|
|
key_schedule(Y,Y);
|
|
pass(b,c,a,9,Y);
|
|
|
|
state[0] = a ^ state[0];
|
|
state[1] = b - state[1];
|
|
state[2] = c + state[2];
|
|
}
|
|
}
|
|
|
|
NAMESPACE_END
|