Fix Altive VectorStore on little-endian

Remove unneeded VectorLeftShift(a,b) and VectorRightShift(a,b)
This commit is contained in:
Jeffrey Walton 2018-08-08 19:59:14 -04:00
parent 96405e14ec
commit 00e7d02a8a
No known key found for this signature in database
GPG Key ID: B36AB348921B1838

View File

@ -71,7 +71,7 @@ template <class T>
inline T Reverse(const T& src) inline T Reverse(const T& src)
{ {
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
return vec_perm(src, src, mask); return (T)vec_perm(src, src, mask);
} }
/// \brief Permutes two vectors /// \brief Permutes two vectors
@ -147,40 +147,6 @@ inline T1 VectorAdd(const T1& vec1, const T2& vec2)
return (T1)vec_add(vec1, (T1)vec2); return (T1)vec_add(vec1, (T1)vec2);
} }
/// \brief Shift two vectors left
/// \tparam C shift byte count
/// \tparam T1 vector type
/// \tparam T2 vector type
/// \param vec1 the first vector
/// \param vec2 the second vector
/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
/// new vector after shifting the concatenation by the specified number
/// of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return
/// vector is the same type as vec1.
/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
/// c)</tt>. On little endian machines VectorShiftLeft() is translated to
/// <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
/// if on a big endian machine as shown below.
/// <pre>
/// uint8x16_p r0 = {0};
/// uint8x16_p r1 = VectorLoad(ptr);
/// uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
/// </pre>
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
/// endian sensitive?</A> on Stack Overflow
/// \since Crypto++ 6.0
template <unsigned int C, class T1, class T2>
inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
{
#if CRYPTOPP_BIG_ENDIAN
enum { R=(C)&0xf };
return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, R);
#else
enum { R=(16-C)&0xf };
return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, R);
#endif
}
/// \brief Shift a vector left /// \brief Shift a vector left
/// \tparam C shift byte count /// \tparam C shift byte count
/// \tparam T vector type /// \tparam T vector type
@ -203,16 +169,40 @@ template <unsigned int C, class T>
inline T VectorShiftLeft(const T& vec) inline T VectorShiftLeft(const T& vec)
{ {
#if CRYPTOPP_BIG_ENDIAN #if CRYPTOPP_BIG_ENDIAN
enum { R=(C)&0xf }; enum { R=(C)&0xf, S=R };
const T zero = VectorXor(vec, vec); const T zero = {0};
return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, R); return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, S);
#else #else
enum { R=(16-C)&0xf }; enum { R=(16-C)&0xf, S=R };
const T zero = VectorXor(vec, vec); const T zero = {0};
return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, R); return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, S);
#endif #endif
} }
// Full specializations for 0 over uint8x16_p to uint64x2_p
template<>
inline uint8x16_p VectorShiftLeft<0, uint8x16_p>(const uint8x16_p& vec)
{
return vec;
}
template<>
inline uint16x8_p VectorShiftLeft<0, uint16x8_p>(const uint16x8_p& vec)
{
return vec;
}
template<>
inline uint32x4_p VectorShiftLeft<0, uint32x4_p>(const uint32x4_p& vec)
{
return vec;
}
#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
template<>
inline uint64x2_p VectorShiftLeft<0, uint64x2_p>(const uint64x2_p& vec)
{
return vec;
}
#endif
/// \brief Shift a vector right /// \brief Shift a vector right
/// \tparam C shift byte count /// \tparam C shift byte count
/// \tparam T vector type /// \tparam T vector type
@ -235,48 +225,86 @@ template <unsigned int C, class T>
inline T VectorShiftRight(const T& vec) inline T VectorShiftRight(const T& vec)
{ {
#if CRYPTOPP_BIG_ENDIAN #if CRYPTOPP_BIG_ENDIAN
enum { R=(C)&0xf }; enum { R=(16-C)&0xf, S=R };
const T zero = VectorXor(vec, vec); const T zero = {0};
return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, R); return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, S);
#else #else
enum { R=(16-C)&0xf }; enum { R=(C)&0xf, S=R };
const T zero = VectorXor(vec, vec); const T zero = {0};
return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, R); return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, S);
#endif #endif
} }
/// \brief Shift two vectors right // Full specializations for 0 over uint8x16_p to uint64x2_p
/// \tparam C shift byte count template<>
inline uint8x16_p VectorShiftRight<0, uint8x16_p>(const uint8x16_p& vec)
{
return vec;
}
template<>
inline uint16x8_p VectorShiftRight<0, uint16x8_p>(const uint16x8_p& vec)
{
return vec;
}
template<>
inline uint32x4_p VectorShiftRight<0, uint32x4_p>(const uint32x4_p& vec)
{
return vec;
}
#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
template<>
inline uint64x2_p VectorShiftRight<0, uint64x2_p>(const uint64x2_p& vec)
{
return vec;
}
#endif
// Full specializations for 16 over uint8x16_p to uint64x2_p
template<>
inline uint8x16_p VectorShiftRight<16, uint8x16_p>(const uint8x16_p& vec)
{
return vec;
}
template<>
inline uint16x8_p VectorShiftRight<16, uint16x8_p>(const uint16x8_p& vec)
{
return vec;
}
template<>
inline uint32x4_p VectorShiftRight<16, uint32x4_p>(const uint32x4_p& vec)
{
return vec;
}
#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
template<>
inline uint64x2_p VectorShiftRight<16, uint64x2_p>(const uint64x2_p& vec)
{
return vec;
}
#endif
/// \brief Compare two vectors
/// \tparam T1 vector type /// \tparam T1 vector type
/// \tparam T2 vector type /// \tparam T2 vector type
/// \param vec1 the first vector /// \param vec1 the first vector
/// \param vec2 the second vector /// \param vec2 the second vector
/// \details VectorShiftRight() concatenates vec1 and vec2 and returns a /// \returns true if vec1 equals vec2, false otherwise
/// new vector after shifting the concatenation by the specified number template <class T1, class T2>
/// of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return inline bool VectorEqual(const T1& vec1, const T2& vec2)
/// vector is the same type as vec1.
/// \details On big endian machines VectorShiftRight() is <tt>vec_sld(b, a,
/// 16-c)</tt>. On little endian machines VectorShiftRight() is translated to
/// <tt>vec_sld(a, b, c)</tt>. You should always call the function as
/// if on a big endian machine as shown below.
/// <pre>
/// uint8x16_p r0 = {0};
/// uint8x16_p r1 = VectorLoad(ptr);
/// uint8x16_p r5 = VectorShiftRight<12>(r0, r1);
/// </pre>
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
/// endian sensitive?</A> on Stack Overflow
/// \since Crypto++ 6.0
template <unsigned int C, class T1, class T2>
inline T1 VectorShiftRight(const T1& vec1, const T2& vec2)
{ {
#if CRYPTOPP_BIG_ENDIAN return 1 == vec_all_eq((uint32x4_p)vec1, (uint32x4_p)vec2);
enum { R=(C)&0xf }; }
return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, R);
#else /// \brief Compare two vectors
enum { R=(16-C)&0xf }; /// \tparam T1 vector type
return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, R); /// \tparam T2 vector type
#endif /// \param vec1 the first vector
/// \param vec2 the second vector
/// \returns true if vec1 does not equal vec2, false otherwise
template <class T1, class T2>
inline bool VectorNotEqual(const T1& vec1, const T2& vec2)
{
return 0 == vec_all_eq((uint32x4_p)vec1, (uint32x4_p)vec2);
} }
#endif // POWER4 and above #endif // POWER4 and above
@ -296,10 +324,10 @@ inline uint32x4_p VectorLoadBE(const uint8_t src[16])
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
return (uint32x4_p)vec_xl_be(0, (byte*)src); return (uint32x4_p)vec_xl_be(0, (byte*)src);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)Reverse(vec_vsx_ld(0, src));
# else
return (uint32x4_p)vec_vsx_ld(0, src); return (uint32x4_p)vec_vsx_ld(0, src);
# else
return (uint32x4_p)Reverse(vec_vsx_ld(0, src));
# endif # endif
#endif #endif
} }
@ -317,10 +345,10 @@ inline uint32x4_p VectorLoadBE(int off, const uint8_t src[16])
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
return (uint32x4_p)vec_xl_be(off, (byte*)src); return (uint32x4_p)vec_xl_be(off, (byte*)src);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)Reverse(vec_vsx_ld(off, src));
# else
return (uint32x4_p)vec_vsx_ld(off, src); return (uint32x4_p)vec_vsx_ld(off, src);
# else
return (uint32x4_p)Reverse(vec_vsx_ld(off, src));
# endif # endif
#endif #endif
} }
@ -371,10 +399,10 @@ inline void VectorStoreBE(const T& src, uint8_t dest[16])
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
vec_xst_be((uint8x16_p)src, 0, dest); vec_xst_be((uint8x16_p)src, 0, dest);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_BIG_ENDIAN)
vec_vsx_st(Reverse((uint8x16_p)src), 0, dest);
# else
vec_vsx_st((uint8x16_p)src, 0, dest); vec_vsx_st((uint8x16_p)src, 0, dest);
# else
vec_vsx_st((uint8x16_p)Reverse(src), 0, dest);
# endif # endif
#endif #endif
} }
@ -395,10 +423,10 @@ inline void VectorStoreBE(const T& src, int off, uint8_t dest[16])
#if defined(CRYPTOPP_XLC_VERSION) #if defined(CRYPTOPP_XLC_VERSION)
vec_xst_be((uint8x16_p)src, off, dest); vec_xst_be((uint8x16_p)src, off, dest);
#else #else
# if defined(CRYPTOPP_LITTLE_ENDIAN) # if defined(CRYPTOPP_BIG_ENDIAN)
vec_vsx_st(Reverse((uint8x16_p)src), off, dest);
# else
vec_vsx_st((uint8x16_p)src, off, dest); vec_vsx_st((uint8x16_p)src, off, dest);
# else
vec_vsx_st((uint8x16_p)Reverse(src), off, dest);
# endif # endif
#endif #endif
} }
@ -472,37 +500,29 @@ inline uint32x4_p VectorLoadBE(const uint8_t src[16])
#if defined(CRYPTOPP_BIG_ENDIAN) #if defined(CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)VectorLoad(src); return (uint32x4_p)VectorLoad(src);
#else #else
const uint8x16_p data = (uint8x16_p)VectorLoad(src); return (uint32x4_p)Reverse(VectorLoad(src));
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
return (uint32x4_p)vec_perm(data, data, mask);
#endif #endif
} }
inline void VectorStore(const uint32x4_p data, byte dest[16]) template<class T>
inline void VectorStore(const T& data, byte dest[16])
{ {
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
const uint8x16_p t1 = (uint8x16_p)vec_perm(data, data, mask);
#else
const uint8x16_p t1 = (uint8x16_p)data;
#endif
if (IsAlignedOn(dest, 16)) if (IsAlignedOn(dest, 16))
{ {
vec_st(t1, 0, dest); vec_st((uint8x16_p)data, 0, dest);
} }
else else
{ {
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest)); uint8x16_p perm = (uint8x16_p)vec_perm(data, data, vec_lvsr(0, dest));
vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest); vec_ste((uint8x16_p) perm, 0, (unsigned char*) dest);
vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest); vec_ste((uint16x8_p) perm, 1, (unsigned short*)dest);
vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest); vec_ste((uint32x4_p) perm, 3, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest); vec_ste((uint32x4_p) perm, 4, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest); vec_ste((uint32x4_p) perm, 8, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest); vec_ste((uint32x4_p) perm, 12, (unsigned int*) dest);
vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest); vec_ste((uint16x8_p) perm, 14, (unsigned short*)dest);
vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest); vec_ste((uint8x16_p) perm, 15, (unsigned char*) dest);
} }
} }
@ -521,8 +541,7 @@ inline void VectorStoreBE(const T& src, uint8_t dest[16])
#if defined(CRYPTOPP_BIG_ENDIAN) #if defined(CRYPTOPP_BIG_ENDIAN)
VectorStore(src, dest); VectorStore(src, dest);
#else #else
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; VectorStore(Reverse(src), dest);
VectorStore(vec_perm(src, src, mask), dest);
#endif #endif
} }