Rename VecPolyMultiplyLE to VecIntelMultiply (PR #908)

The LE multiplies are compatible with Intel's _mm_clmulepi64_si128
This commit is contained in:
Jeffrey Walton 2019-10-26 22:57:58 -04:00 committed by GitHub
parent fa39314b7a
commit 183fba44bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 37 additions and 37 deletions

View File

@ -190,8 +190,8 @@ bool CPU_ProbePMULL()
0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
const uint32x4_p a2=VecLoad(wa2), b2=VecLoad(wb2);
const uint64x2_p r1 = VecPolyMultiply00LE(a1, b1);
const uint64x2_p r2 = VecPolyMultiply11LE((uint64x2_p)a2, (uint64x2_p)b2);
const uint64x2_p r1 = VecIntelMultiply00(a1, b1);
const uint64x2_p r2 = VecIntelMultiply11((uint64x2_p)a2, (uint64x2_p)b2);
const uint64_t wc1[]={W64LIT(0x5300530053005300), W64LIT(0x5300530053005300)},
wc2[]={W64LIT(0x6c006c006c006c00), W64LIT(0x6c006c006c006c00)};
@ -575,9 +575,9 @@ uint64x2_p GCM_Reduce_VMULL(uint64x2_p c0, uint64x2_p c1, uint64x2_p c2, uint64x
const uint64x2_p m1 = {1,1}, m63 = {63,63};
c1 = VecXor(c1, VecShiftRightOctet<8>(c0));
c1 = VecXor(c1, VecPolyMultiply10LE(c0, r));
c1 = VecXor(c1, VecIntelMultiply10(c0, r));
c0 = VecXor(c1, VecShiftLeftOctet<8>(c0));
c0 = VecPolyMultiply00LE(vec_sl(c0, m1), r);
c0 = VecIntelMultiply00(vec_sl(c0, m1), r);
c2 = VecXor(c2, c0);
c2 = VecXor(c2, VecShiftLeftOctet<8>(c1));
c1 = vec_sr(vec_mergeh(c1, c2), m63);
@ -588,9 +588,9 @@ uint64x2_p GCM_Reduce_VMULL(uint64x2_p c0, uint64x2_p c1, uint64x2_p c2, uint64x
inline uint64x2_p GCM_Multiply_VMULL(uint64x2_p x, uint64x2_p h, uint64x2_p r)
{
const uint64x2_p c0 = VecPolyMultiply00LE(x, h);
const uint64x2_p c1 = VecXor(VecPolyMultiply01LE(x, h), VecPolyMultiply10LE(x, h));
const uint64x2_p c2 = VecPolyMultiply11LE(x, h);
const uint64x2_p c0 = VecIntelMultiply00(x, h);
const uint64x2_p c1 = VecXor(VecIntelMultiply01(x, h), VecIntelMultiply10(x, h));
const uint64x2_p c2 = VecIntelMultiply11(x, h);
return GCM_Reduce_VMULL(c0, c1, c2, r);
}
@ -685,35 +685,35 @@ size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mt
{
d1 = LoadBuffer2(data);
d1 = VecXor(d1, x);
c0 = VecXor(c0, VecPolyMultiply00LE(d1, h0));
c2 = VecXor(c2, VecPolyMultiply01LE(d1, h1));
c0 = VecXor(c0, VecIntelMultiply00(d1, h0));
c2 = VecXor(c2, VecIntelMultiply01(d1, h1));
d1 = VecXor(d1, SwapWords(d1));
c1 = VecXor(c1, VecPolyMultiply00LE(d1, h2));
c1 = VecXor(c1, VecIntelMultiply00(d1, h2));
break;
}
d1 = LoadBuffer1(data+(s-i)*16-8);
c0 = VecXor(c0, VecPolyMultiply01LE(d2, h0));
c2 = VecXor(c2, VecPolyMultiply01LE(d1, h1));
c0 = VecXor(c0, VecIntelMultiply01(d2, h0));
c2 = VecXor(c2, VecIntelMultiply01(d1, h1));
d2 = VecXor(d2, d1);
c1 = VecXor(c1, VecPolyMultiply01LE(d2, h2));
c1 = VecXor(c1, VecIntelMultiply01(d2, h2));
if (++i == s)
{
d1 = LoadBuffer2(data);
d1 = VecXor(d1, x);
c0 = VecXor(c0, VecPolyMultiply10LE(d1, h0));
c2 = VecXor(c2, VecPolyMultiply11LE(d1, h1));
c0 = VecXor(c0, VecIntelMultiply10(d1, h0));
c2 = VecXor(c2, VecIntelMultiply11(d1, h1));
d1 = VecXor(d1, SwapWords(d1));
c1 = VecXor(c1, VecPolyMultiply10LE(d1, h2));
c1 = VecXor(c1, VecIntelMultiply10(d1, h2));
break;
}
d2 = LoadBuffer2(data+(s-i)*16-8);
c0 = VecXor(c0, VecPolyMultiply10LE(d1, h0));
c2 = VecXor(c2, VecPolyMultiply10LE(d2, h1));
c0 = VecXor(c0, VecIntelMultiply10(d1, h0));
c2 = VecXor(c2, VecIntelMultiply10(d2, h1));
d1 = VecXor(d1, d2);
c1 = VecXor(c1, VecPolyMultiply10LE(d1, h2));
c1 = VecXor(c1, VecIntelMultiply10(d1, h2));
}
data += s*16;
len -= s*16;

View File

@ -325,8 +325,8 @@ using CryptoPP::VecMergeHigh;
using CryptoPP::VecShiftLeft;
using CryptoPP::VecShiftRight;
using CryptoPP::VecPolyMultiply00LE;
using CryptoPP::VecPolyMultiply11LE;
using CryptoPP::VecIntelMultiply00;
using CryptoPP::VecIntelMultiply11;
// c1c0 = a * b
inline void
@ -335,13 +335,13 @@ F2N_Multiply_128x128_POWER8(uint64x2_p& c1, uint64x2_p& c0, const uint64x2_p& a,
uint64x2_p t1, t2;
const uint64x2_p z0={0};
c0 = VecPolyMultiply00LE(a, b);
c1 = VecPolyMultiply11LE(a, b);
c0 = VecIntelMultiply00(a, b);
c1 = VecIntelMultiply11(a, b);
t1 = VecMergeLow(a, a);
t1 = VecXor(a, t1);
t2 = VecMergeLow(b, b);
t2 = VecXor(b, t2);
t1 = VecPolyMultiply00LE(t1, t2);
t1 = VecIntelMultiply00(t1, t2);
t1 = VecXor(c0, t1);
t1 = VecXor(c1, t1);
t2 = t1;
@ -380,10 +380,10 @@ inline void
F2N_Square_256_POWER8(uint64x2_p& c3, uint64x2_p& c2, uint64x2_p& c1,
uint64x2_p& c0, const uint64x2_p& a1, const uint64x2_p& a0)
{
c0 = VecPolyMultiply00LE(a0, a0);
c1 = VecPolyMultiply11LE(a0, a0);
c2 = VecPolyMultiply00LE(a1, a1);
c3 = VecPolyMultiply11LE(a1, a1);
c0 = VecIntelMultiply00(a0, a0);
c1 = VecIntelMultiply11(a0, a0);
c2 = VecIntelMultiply00(a1, a1);
c3 = VecIntelMultiply11(a1, a1);
}
// x = (x << n), z = 0

View File

@ -73,7 +73,7 @@ bool CPU_ProbePower7()
result = (0 == std::memcmp(b1+3, b2+1, 16));
#else
result = false;
#endif
#endif
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);

View File

@ -1437,7 +1437,7 @@ inline uint64x2_p VecPolyMultiply(const uint64x2_p& a, const uint64x2_p& b)
/// \param a the first term
/// \param b the second term
/// \returns vector product
/// \details VecPolyMultiply00LE() performs polynomial multiplication and presents
/// \details VecIntelMultiply00() performs polynomial multiplication and presents
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x00)</tt>.
/// The <tt>0x00</tt> indicates the low 64-bits of <tt>a</tt> and <tt>b</tt>
/// are multiplied.
@ -1446,7 +1446,7 @@ inline uint64x2_p VecPolyMultiply(const uint64x2_p& a, const uint64x2_p& b)
/// \par Wraps
/// __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd.
/// \since Crypto++ 8.0
inline uint64x2_p VecPolyMultiply00LE(const uint64x2_p& a, const uint64x2_p& b)
inline uint64x2_p VecIntelMultiply00(const uint64x2_p& a, const uint64x2_p& b)
{
#if (CRYPTOPP_BIG_ENDIAN)
return VecSwapWords(VecPolyMultiply(VecGetHigh(a), VecGetHigh(b)));
@ -1459,7 +1459,7 @@ inline uint64x2_p VecPolyMultiply00LE(const uint64x2_p& a, const uint64x2_p& b)
/// \param a the first term
/// \param b the second term
/// \returns vector product
/// \details VecPolyMultiply01LE performs() polynomial multiplication and presents
/// \details VecIntelMultiply01 performs() polynomial multiplication and presents
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x01)</tt>.
/// The <tt>0x01</tt> indicates the low 64-bits of <tt>a</tt> and high
/// 64-bits of <tt>b</tt> are multiplied.
@ -1468,7 +1468,7 @@ inline uint64x2_p VecPolyMultiply00LE(const uint64x2_p& a, const uint64x2_p& b)
/// \par Wraps
/// __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd.
/// \since Crypto++ 8.0
inline uint64x2_p VecPolyMultiply01LE(const uint64x2_p& a, const uint64x2_p& b)
inline uint64x2_p VecIntelMultiply01(const uint64x2_p& a, const uint64x2_p& b)
{
#if (CRYPTOPP_BIG_ENDIAN)
return VecSwapWords(VecPolyMultiply(a, VecGetHigh(b)));
@ -1481,7 +1481,7 @@ inline uint64x2_p VecPolyMultiply01LE(const uint64x2_p& a, const uint64x2_p& b)
/// \param a the first term
/// \param b the second term
/// \returns vector product
/// \details VecPolyMultiply10LE() performs polynomial multiplication and presents
/// \details VecIntelMultiply10() performs polynomial multiplication and presents
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x10)</tt>.
/// The <tt>0x10</tt> indicates the high 64-bits of <tt>a</tt> and low
/// 64-bits of <tt>b</tt> are multiplied.
@ -1490,7 +1490,7 @@ inline uint64x2_p VecPolyMultiply01LE(const uint64x2_p& a, const uint64x2_p& b)
/// \par Wraps
/// __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd.
/// \since Crypto++ 8.0
inline uint64x2_p VecPolyMultiply10LE(const uint64x2_p& a, const uint64x2_p& b)
inline uint64x2_p VecIntelMultiply10(const uint64x2_p& a, const uint64x2_p& b)
{
#if (CRYPTOPP_BIG_ENDIAN)
return VecSwapWords(VecPolyMultiply(VecGetHigh(a), b));
@ -1503,7 +1503,7 @@ inline uint64x2_p VecPolyMultiply10LE(const uint64x2_p& a, const uint64x2_p& b)
/// \param a the first term
/// \param b the second term
/// \returns vector product
/// \details VecPolyMultiply11LE() performs polynomial multiplication and presents
/// \details VecIntelMultiply11() performs polynomial multiplication and presents
/// the result like Intel's <tt>c = _mm_clmulepi64_si128(a, b, 0x11)</tt>.
/// The <tt>0x11</tt> indicates the high 64-bits of <tt>a</tt> and <tt>b</tt>
/// are multiplied.
@ -1512,7 +1512,7 @@ inline uint64x2_p VecPolyMultiply10LE(const uint64x2_p& a, const uint64x2_p& b)
/// \par Wraps
/// __vpmsumd, __builtin_altivec_crypto_vpmsumd and __builtin_crypto_vpmsumd.
/// \since Crypto++ 8.0
inline uint64x2_p VecPolyMultiply11LE(const uint64x2_p& a, const uint64x2_p& b)
inline uint64x2_p VecIntelMultiply11(const uint64x2_p& a, const uint64x2_p& b)
{
#if (CRYPTOPP_BIG_ENDIAN)
return VecSwapWords(VecPolyMultiply(VecGetLow(a), b));