mirror of
https://github.com/shadps4-emu/ext-cryptopp.git
synced 2024-11-27 03:40:22 +00:00
Add Octet suffix for vec_sld
We need to make room for packed shifts and rotates
This commit is contained in:
parent
fa8e692d84
commit
505c58acc1
10
gcm_simd.cpp
10
gcm_simd.cpp
@ -173,7 +173,7 @@ using CryptoPP::uint32x4_p;
|
||||
using CryptoPP::uint64x2_p;
|
||||
using CryptoPP::VectorGetLow;
|
||||
using CryptoPP::VectorGetHigh;
|
||||
using CryptoPP::VectorRotateLeft;
|
||||
using CryptoPP::VectorRotateLeftOctet;
|
||||
|
||||
// POWER8 GCM mode is confusing. The algorithm is reflected so
|
||||
// nearly everything we do is reversed for a little-endian system,
|
||||
@ -192,7 +192,7 @@ using CryptoPP::VectorRotateLeft;
|
||||
inline uint64x2_p VMULL2LE(const uint64x2_p& val)
|
||||
{
|
||||
#if (CRYPTOPP_BIG_ENDIAN)
|
||||
return VectorRotateLeft<8>(val);
|
||||
return VectorRotateLeftOctet<8>(val);
|
||||
#else
|
||||
return val;
|
||||
#endif
|
||||
@ -755,10 +755,10 @@ uint64x2_p GCM_Reduce_VMULL(uint64x2_p c0, uint64x2_p c1, uint64x2_p c2, uint64x
|
||||
|
||||
c1 = VectorXor(c1, VectorShiftRight<8>(c0));
|
||||
c1 = VectorXor(c1, VMULL_10LE(c0, r));
|
||||
c0 = VectorXor(c1, VectorShiftLeft<8>(c0));
|
||||
c0 = VectorXor(c1, VectorShiftLeftOctet<8>(c0));
|
||||
c0 = VMULL_00LE(vec_sl(c0, m1), r);
|
||||
c2 = VectorXor(c2, c0);
|
||||
c2 = VectorXor(c2, VectorShiftLeft<8>(c1));
|
||||
c2 = VectorXor(c2, VectorShiftLeftOctet<8>(c1));
|
||||
c1 = vec_sr(vec_mergeh(c1, c2), m63);
|
||||
c2 = vec_sl(c2, m1);
|
||||
|
||||
@ -820,7 +820,7 @@ void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned
|
||||
template <class T>
|
||||
inline T SwapWords(const T& data)
|
||||
{
|
||||
return (T)VectorRotateLeft<8>(data);
|
||||
return (T)VectorRotateLeftOctet<8>(data);
|
||||
}
|
||||
|
||||
inline uint64x2_p LoadBuffer1(const byte *dataBuffer)
|
||||
|
20
ppc_simd.h
20
ppc_simd.h
@ -172,22 +172,22 @@ inline T1 VectorSub(const T1& vec1, const T2& vec2)
|
||||
/// \tparam T vector type
|
||||
/// \param vec the vector
|
||||
/// \returns vector
|
||||
/// \details VectorShiftLeft() returns a new vector after shifting the
|
||||
/// \details VectorShiftLeftOctet() returns a new vector after shifting the
|
||||
/// concatenation of the zero vector and the source vector by the specified
|
||||
/// number of bytes. The return vector is the same type as vec.
|
||||
/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, z,
|
||||
/// c)</tt>. On little endian machines VectorShiftLeft() is translated to
|
||||
/// \details On big endian machines VectorShiftLeftOctet() is <tt>vec_sld(a, z,
|
||||
/// c)</tt>. On little endian machines VectorShiftLeftOctet() is translated to
|
||||
/// <tt>vec_sld(z, a, 16-c)</tt>. You should always call the function as
|
||||
/// if on a big endian machine as shown below.
|
||||
/// <pre>
|
||||
/// uint8x16_p r1 = VectorLoad(ptr);
|
||||
/// uint8x16_p r5 = VectorShiftLeft<12>(r1);
|
||||
/// uint8x16_p r5 = VectorShiftLeftOctet<12>(r1);
|
||||
/// </pre>
|
||||
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
|
||||
/// endian sensitive?</A> on Stack Overflow
|
||||
/// \since Crypto++ 6.0
|
||||
template <unsigned int C, class T>
|
||||
inline T VectorShiftLeft(const T& vec)
|
||||
inline T VectorShiftLeftOctet(const T& vec)
|
||||
{
|
||||
const T zero = {0};
|
||||
if (C >= 16)
|
||||
@ -258,14 +258,14 @@ inline T VectorShiftRight(const T& vec)
|
||||
/// \tparam T vector type
|
||||
/// \param vec the vector
|
||||
/// \returns vector
|
||||
/// \details VectorRotateLeft() returns a new vector after rotating the
|
||||
/// \details VectorRotateLeftOctet() returns a new vector after rotating the
|
||||
/// concatenation of the source vector with itself by the specified
|
||||
/// number of bytes. The return vector is the same type as vec.
|
||||
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
|
||||
/// endian sensitive?</A> on Stack Overflow
|
||||
/// \since Crypto++ 6.0
|
||||
template <unsigned int C, class T>
|
||||
inline T VectorRotateLeft(const T& vec)
|
||||
inline T VectorRotateLeftOctet(const T& vec)
|
||||
{
|
||||
enum { R = C&0xf };
|
||||
#if (CRYPTOPP_BIG_ENDIAN)
|
||||
@ -280,14 +280,14 @@ inline T VectorRotateLeft(const T& vec)
|
||||
/// \tparam T vector type
|
||||
/// \param vec the vector
|
||||
/// \returns vector
|
||||
/// \details VectorRotateRight() returns a new vector after rotating the
|
||||
/// \details VectorRotateRightOctet() returns a new vector after rotating the
|
||||
/// concatenation of the source vector with itself by the specified
|
||||
/// number of bytes. The return vector is the same type as vec.
|
||||
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
|
||||
/// endian sensitive?</A> on Stack Overflow
|
||||
/// \since Crypto++ 6.0
|
||||
template <unsigned int C, class T>
|
||||
inline T VectorRotateRight(const T& vec)
|
||||
inline T VectorRotateRightOctet(const T& vec)
|
||||
{
|
||||
enum { R = C&0xf };
|
||||
#if (CRYPTOPP_BIG_ENDIAN)
|
||||
@ -322,7 +322,7 @@ inline T VectorGetLow(const T& val)
|
||||
//const T zero = {0};
|
||||
//const uint8x16_p mask = {16,16,16,16, 16,16,16,16, 8,9,10,11, 12,13,14,15 };
|
||||
//return (T)vec_perm(zero, val, mask);
|
||||
return VectorShiftRight<8>(VectorShiftLeft<8>(val));
|
||||
return VectorShiftRight<8>(VectorShiftLeftOctet<8>(val));
|
||||
}
|
||||
|
||||
/// \brief Extract a dword from a vector
|
||||
|
152
sha_simd.cpp
152
sha_simd.cpp
@ -1189,7 +1189,7 @@ uint32x4_p8 VectorPack(const uint32x4_p8 a, const uint32x4_p8 b,
|
||||
}
|
||||
|
||||
template <unsigned int L> static inline
|
||||
uint32x4_p8 VectorShiftLeft(const uint32x4_p8 val)
|
||||
uint32x4_p8 VectorShiftLeftOctet(const uint32x4_p8 val)
|
||||
{
|
||||
#if (CRYPTOPP_LITTLE_ENDIAN)
|
||||
return (uint32x4_p8)vec_sld((uint8x16_p8)val, (uint8x16_p8)val, (16-L)&0xf);
|
||||
@ -1199,10 +1199,10 @@ uint32x4_p8 VectorShiftLeft(const uint32x4_p8 val)
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32x4_p8 VectorShiftLeft<0>(const uint32x4_p8 val) { return val; }
|
||||
uint32x4_p8 VectorShiftLeftOctet<0>(const uint32x4_p8 val) { return val; }
|
||||
|
||||
template <>
|
||||
uint32x4_p8 VectorShiftLeft<16>(const uint32x4_p8 val) { return val; }
|
||||
uint32x4_p8 VectorShiftLeftOctet<16>(const uint32x4_p8 val) { return val; }
|
||||
|
||||
template <unsigned int R> static inline
|
||||
void SHA256_ROUND1(uint32x4_p8 W[16], uint32x4_p8 S[8], const uint32x4_p8 K, const uint32x4_p8 M)
|
||||
@ -1257,12 +1257,12 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
|
||||
unsigned int offset=0;
|
||||
|
||||
S[A] = abcd; S[E] = efgh;
|
||||
S[B] = VectorShiftLeft<4>(S[A]);
|
||||
S[F] = VectorShiftLeft<4>(S[E]);
|
||||
S[C] = VectorShiftLeft<4>(S[B]);
|
||||
S[G] = VectorShiftLeft<4>(S[F]);
|
||||
S[D] = VectorShiftLeft<4>(S[C]);
|
||||
S[H] = VectorShiftLeft<4>(S[G]);
|
||||
S[B] = VectorShiftLeftOctet<4>(S[A]);
|
||||
S[F] = VectorShiftLeftOctet<4>(S[E]);
|
||||
S[C] = VectorShiftLeftOctet<4>(S[B]);
|
||||
S[G] = VectorShiftLeftOctet<4>(S[F]);
|
||||
S[D] = VectorShiftLeftOctet<4>(S[C]);
|
||||
S[H] = VectorShiftLeftOctet<4>(S[G]);
|
||||
|
||||
// Rounds 0-16
|
||||
vk = VectorLoad32x4u(k, offset);
|
||||
@ -1270,16 +1270,16 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
|
||||
SHA256_ROUND1<0>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<1>(W,S, vk,vm);
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<2>(W,S, vk,vm);
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<3>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad32x4u(k, offset);
|
||||
@ -1287,16 +1287,16 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
|
||||
SHA256_ROUND1<4>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<5>(W,S, vk,vm);
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<6>(W,S, vk,vm);
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<7>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad32x4u(k, offset);
|
||||
@ -1304,16 +1304,16 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
|
||||
SHA256_ROUND1<8>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<9>(W,S, vk,vm);
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<10>(W,S, vk,vm);
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<11>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad32x4u(k, offset);
|
||||
@ -1321,16 +1321,16 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
|
||||
SHA256_ROUND1<12>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<13>(W,S, vk,vm);
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<14>(W,S, vk,vm);
|
||||
|
||||
vk = VectorShiftLeft<4>(vk);
|
||||
vm = VectorShiftLeft<4>(vm);
|
||||
vk = VectorShiftLeftOctet<4>(vk);
|
||||
vm = VectorShiftLeftOctet<4>(vm);
|
||||
SHA256_ROUND1<15>(W,S, vk,vm);
|
||||
|
||||
m += 16; // 32-bit words, not bytes
|
||||
@ -1340,30 +1340,30 @@ void SHA256_HashMultipleBlocks_POWER8(word32 *state, const word32 *data, size_t
|
||||
{
|
||||
vk = VectorLoad32x4u(k, offset);
|
||||
SHA256_ROUND2<0>(W,S, vk);
|
||||
SHA256_ROUND2<1>(W,S, VectorShiftLeft<4>(vk));
|
||||
SHA256_ROUND2<2>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA256_ROUND2<3>(W,S, VectorShiftLeft<12>(vk));
|
||||
SHA256_ROUND2<1>(W,S, VectorShiftLeftOctet<4>(vk));
|
||||
SHA256_ROUND2<2>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
SHA256_ROUND2<3>(W,S, VectorShiftLeftOctet<12>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad32x4u(k, offset);
|
||||
SHA256_ROUND2<4>(W,S, vk);
|
||||
SHA256_ROUND2<5>(W,S, VectorShiftLeft<4>(vk));
|
||||
SHA256_ROUND2<6>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA256_ROUND2<7>(W,S, VectorShiftLeft<12>(vk));
|
||||
SHA256_ROUND2<5>(W,S, VectorShiftLeftOctet<4>(vk));
|
||||
SHA256_ROUND2<6>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
SHA256_ROUND2<7>(W,S, VectorShiftLeftOctet<12>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad32x4u(k, offset);
|
||||
SHA256_ROUND2<8>(W,S, vk);
|
||||
SHA256_ROUND2<9>(W,S, VectorShiftLeft<4>(vk));
|
||||
SHA256_ROUND2<10>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA256_ROUND2<11>(W,S, VectorShiftLeft<12>(vk));
|
||||
SHA256_ROUND2<9>(W,S, VectorShiftLeftOctet<4>(vk));
|
||||
SHA256_ROUND2<10>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
SHA256_ROUND2<11>(W,S, VectorShiftLeftOctet<12>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad32x4u(k, offset);
|
||||
SHA256_ROUND2<12>(W,S, vk);
|
||||
SHA256_ROUND2<13>(W,S, VectorShiftLeft<4>(vk));
|
||||
SHA256_ROUND2<14>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA256_ROUND2<15>(W,S, VectorShiftLeft<12>(vk));
|
||||
SHA256_ROUND2<13>(W,S, VectorShiftLeftOctet<4>(vk));
|
||||
SHA256_ROUND2<14>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
SHA256_ROUND2<15>(W,S, VectorShiftLeftOctet<12>(vk));
|
||||
offset+=16;
|
||||
}
|
||||
|
||||
@ -1478,7 +1478,7 @@ uint64x2_p8 VectorPack(const uint64x2_p8 x, const uint64x2_p8 y)
|
||||
}
|
||||
|
||||
template <unsigned int L> static inline
|
||||
uint64x2_p8 VectorShiftLeft(const uint64x2_p8 val)
|
||||
uint64x2_p8 VectorShiftLeftOctet(const uint64x2_p8 val)
|
||||
{
|
||||
#if (CRYPTOPP_LITTLE_ENDIAN)
|
||||
return (uint64x2_p8)vec_sld((uint8x16_p8)val, (uint8x16_p8)val, (16-L)&0xf);
|
||||
@ -1488,10 +1488,10 @@ uint64x2_p8 VectorShiftLeft(const uint64x2_p8 val)
|
||||
}
|
||||
|
||||
template <>
|
||||
uint64x2_p8 VectorShiftLeft<0>(const uint64x2_p8 val) { return val; }
|
||||
uint64x2_p8 VectorShiftLeftOctet<0>(const uint64x2_p8 val) { return val; }
|
||||
|
||||
template <>
|
||||
uint64x2_p8 VectorShiftLeft<16>(const uint64x2_p8 val) { return val; }
|
||||
uint64x2_p8 VectorShiftLeftOctet<16>(const uint64x2_p8 val) { return val; }
|
||||
|
||||
template <unsigned int R> static inline
|
||||
void SHA512_ROUND1(uint64x2_p8 W[16], uint64x2_p8 S[8], const uint64x2_p8 K, const uint64x2_p8 M)
|
||||
@ -1549,10 +1549,10 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
|
||||
S[A] = ab; S[C] = cd;
|
||||
S[E] = ef; S[G] = gh;
|
||||
S[B] = VectorShiftLeft<8>(S[A]);
|
||||
S[D] = VectorShiftLeft<8>(S[C]);
|
||||
S[F] = VectorShiftLeft<8>(S[E]);
|
||||
S[H] = VectorShiftLeft<8>(S[G]);
|
||||
S[B] = VectorShiftLeftOctet<8>(S[A]);
|
||||
S[D] = VectorShiftLeftOctet<8>(S[C]);
|
||||
S[F] = VectorShiftLeftOctet<8>(S[E]);
|
||||
S[H] = VectorShiftLeftOctet<8>(S[G]);
|
||||
|
||||
// Rounds 0-16
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
@ -1560,8 +1560,8 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
SHA512_ROUND1<0>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<8>(vk);
|
||||
vm = VectorShiftLeft<8>(vm);
|
||||
vk = VectorShiftLeftOctet<8>(vk);
|
||||
vm = VectorShiftLeftOctet<8>(vm);
|
||||
SHA512_ROUND1<1>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
@ -1569,8 +1569,8 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
SHA512_ROUND1<2>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<8>(vk);
|
||||
vm = VectorShiftLeft<8>(vm);
|
||||
vk = VectorShiftLeftOctet<8>(vk);
|
||||
vm = VectorShiftLeftOctet<8>(vm);
|
||||
SHA512_ROUND1<3>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
@ -1578,8 +1578,8 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
SHA512_ROUND1<4>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<8>(vk);
|
||||
vm = VectorShiftLeft<8>(vm);
|
||||
vk = VectorShiftLeftOctet<8>(vk);
|
||||
vm = VectorShiftLeftOctet<8>(vm);
|
||||
SHA512_ROUND1<5>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
@ -1587,8 +1587,8 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
SHA512_ROUND1<6>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<8>(vk);
|
||||
vm = VectorShiftLeft<8>(vm);
|
||||
vk = VectorShiftLeftOctet<8>(vk);
|
||||
vm = VectorShiftLeftOctet<8>(vm);
|
||||
SHA512_ROUND1<7>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
@ -1596,8 +1596,8 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
SHA512_ROUND1<8>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<8>(vk);
|
||||
vm = VectorShiftLeft<8>(vm);
|
||||
vk = VectorShiftLeftOctet<8>(vk);
|
||||
vm = VectorShiftLeftOctet<8>(vm);
|
||||
SHA512_ROUND1<9>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
@ -1605,8 +1605,8 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
SHA512_ROUND1<10>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<8>(vk);
|
||||
vm = VectorShiftLeft<8>(vm);
|
||||
vk = VectorShiftLeftOctet<8>(vk);
|
||||
vm = VectorShiftLeftOctet<8>(vm);
|
||||
SHA512_ROUND1<11>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
@ -1614,8 +1614,8 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
SHA512_ROUND1<12>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<8>(vk);
|
||||
vm = VectorShiftLeft<8>(vm);
|
||||
vk = VectorShiftLeftOctet<8>(vk);
|
||||
vm = VectorShiftLeftOctet<8>(vm);
|
||||
SHA512_ROUND1<13>(W,S, vk,vm);
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
@ -1623,8 +1623,8 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
SHA512_ROUND1<14>(W,S, vk,vm);
|
||||
offset+=16;
|
||||
|
||||
vk = VectorShiftLeft<8>(vk);
|
||||
vm = VectorShiftLeft<8>(vm);
|
||||
vk = VectorShiftLeftOctet<8>(vk);
|
||||
vm = VectorShiftLeftOctet<8>(vm);
|
||||
SHA512_ROUND1<15>(W,S, vk,vm);
|
||||
|
||||
m += 16; // 64-bit words, not bytes
|
||||
@ -1634,42 +1634,42 @@ void SHA512_HashMultipleBlocks_POWER8(word64 *state, const word64 *data, size_t
|
||||
{
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
SHA512_ROUND2<0>(W,S, vk);
|
||||
SHA512_ROUND2<1>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA512_ROUND2<1>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
SHA512_ROUND2<2>(W,S, vk);
|
||||
SHA512_ROUND2<3>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA512_ROUND2<3>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
SHA512_ROUND2<4>(W,S, vk);
|
||||
SHA512_ROUND2<5>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA512_ROUND2<5>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
SHA512_ROUND2<6>(W,S, vk);
|
||||
SHA512_ROUND2<7>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA512_ROUND2<7>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
SHA512_ROUND2<8>(W,S, vk);
|
||||
SHA512_ROUND2<9>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA512_ROUND2<9>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
SHA512_ROUND2<10>(W,S, vk);
|
||||
SHA512_ROUND2<11>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA512_ROUND2<11>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
SHA512_ROUND2<12>(W,S, vk);
|
||||
SHA512_ROUND2<13>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA512_ROUND2<13>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
offset+=16;
|
||||
|
||||
vk = VectorLoad64x2u(k, offset);
|
||||
SHA512_ROUND2<14>(W,S, vk);
|
||||
SHA512_ROUND2<15>(W,S, VectorShiftLeft<8>(vk));
|
||||
SHA512_ROUND2<15>(W,S, VectorShiftLeftOctet<8>(vk));
|
||||
offset+=16;
|
||||
}
|
||||
|
||||
|
10
validat1.cpp
10
validat1.cpp
@ -1143,7 +1143,7 @@ bool TestAltivecOps()
|
||||
uint8x16_p val = {0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff};
|
||||
|
||||
pass2 = (VectorEqual(val, VectorShiftLeft<0>(val))) && pass2;
|
||||
pass2 = (VectorEqual(val, VectorShiftLeftOctet<0>(val))) && pass2;
|
||||
CRYPTOPP_ASSERT(pass2);
|
||||
pass2 = (VectorEqual(val, VectorShiftRight<0>(val))) && pass2;
|
||||
CRYPTOPP_ASSERT(pass2);
|
||||
@ -1153,7 +1153,7 @@ bool TestAltivecOps()
|
||||
uint8x16_p rsh1 = {0x00,0xff,0xff,0xff, 0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff};
|
||||
|
||||
pass2 = (VectorEqual(lsh1, VectorShiftLeft<1>(val))) && pass2;
|
||||
pass2 = (VectorEqual(lsh1, VectorShiftLeftOctet<1>(val))) && pass2;
|
||||
CRYPTOPP_ASSERT(pass2);
|
||||
pass2 = (VectorEqual(rsh1, VectorShiftRight<1>(val))) && pass2;
|
||||
CRYPTOPP_ASSERT(pass2);
|
||||
@ -1163,7 +1163,7 @@ bool TestAltivecOps()
|
||||
uint8x16_p rsh15 = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0xff};
|
||||
|
||||
pass2 = (VectorEqual(lsh15, VectorShiftLeft<15>(val))) && pass2;
|
||||
pass2 = (VectorEqual(lsh15, VectorShiftLeftOctet<15>(val))) && pass2;
|
||||
CRYPTOPP_ASSERT(pass2);
|
||||
pass2 = (VectorEqual(rsh15, VectorShiftRight<15>(val))) && pass2;
|
||||
CRYPTOPP_ASSERT(pass2);
|
||||
@ -1173,7 +1173,7 @@ bool TestAltivecOps()
|
||||
uint8x16_p rsh16 = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00};
|
||||
|
||||
pass2 = (VectorEqual(lsh16, VectorShiftLeft<16>(val))) && pass2;
|
||||
pass2 = (VectorEqual(lsh16, VectorShiftLeftOctet<16>(val))) && pass2;
|
||||
CRYPTOPP_ASSERT(pass2);
|
||||
pass2 = (VectorEqual(rsh16, VectorShiftRight<16>(val))) && pass2;
|
||||
CRYPTOPP_ASSERT(pass2);
|
||||
@ -1199,7 +1199,7 @@ bool TestAltivecOps()
|
||||
pass3 = VectorEqual(ex3, VectorGetHigh(ex1)) && pass3;
|
||||
CRYPTOPP_ASSERT(pass3);
|
||||
|
||||
uint8x16_p ex4 = VectorShiftRight<8>(VectorShiftLeft<8>(ex1));
|
||||
uint8x16_p ex4 = VectorShiftRight<8>(VectorShiftLeftOctet<8>(ex1));
|
||||
pass3 = VectorEqual(ex4, VectorGetLow(ex1)) && pass3;
|
||||
CRYPTOPP_ASSERT(pass3);
|
||||
uint8x16_p ex5 = VectorShiftRight<8>(ex1);
|
||||
|
Loading…
Reference in New Issue
Block a user