diff --git a/ppc-simd.h b/ppc-simd.h index 7f4c1913..47896153 100644 --- a/ppc-simd.h +++ b/ppc-simd.h @@ -71,7 +71,7 @@ template inline T Reverse(const T& src) { const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; - return vec_perm(src, src, mask); + return (T)vec_perm(src, src, mask); } /// \brief Permutes two vectors @@ -147,40 +147,6 @@ inline T1 VectorAdd(const T1& vec1, const T2& vec2) return (T1)vec_add(vec1, (T1)vec2); } -/// \brief Shift two vectors left -/// \tparam C shift byte count -/// \tparam T1 vector type -/// \tparam T2 vector type -/// \param vec1 the first vector -/// \param vec2 the second vector -/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a -/// new vector after shifting the concatenation by the specified number -/// of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return -/// vector is the same type as vec1. -/// \details On big endian machines VectorShiftLeft() is vec_sld(a, b, -/// c). On little endian machines VectorShiftLeft() is translated to -/// vec_sld(b, a, 16-c). You should always call the function as -/// if on a big endian machine as shown below. -///
-///    uint8x16_p r0 = {0};
-///    uint8x16_p r1 = VectorLoad(ptr);
-///    uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
-/// 
-/// \sa Is vec_sld -/// endian sensitive? on Stack Overflow -/// \since Crypto++ 6.0 -template -inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2) -{ -#if CRYPTOPP_BIG_ENDIAN - enum { R=(C)&0xf }; - return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, R); -#else - enum { R=(16-C)&0xf }; - return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, R); -#endif -} - /// \brief Shift a vector left /// \tparam C shift byte count /// \tparam T vector type @@ -203,16 +169,40 @@ template inline T VectorShiftLeft(const T& vec) { #if CRYPTOPP_BIG_ENDIAN - enum { R=(C)&0xf }; - const T zero = VectorXor(vec, vec); - return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, R); + enum { R=(C)&0xf, S=R }; + const T zero = {0}; + return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, S); #else - enum { R=(16-C)&0xf }; - const T zero = VectorXor(vec, vec); - return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, R); + enum { R=(16-C)&0xf, S=R }; + const T zero = {0}; + return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, S); #endif } +// Full specializations for 0 over uint8x16_p to uint64x2_p +template<> +inline uint8x16_p VectorShiftLeft<0, uint8x16_p>(const uint8x16_p& vec) +{ + return vec; +} +template<> +inline uint16x8_p VectorShiftLeft<0, uint16x8_p>(const uint16x8_p& vec) +{ + return vec; +} +template<> +inline uint32x4_p VectorShiftLeft<0, uint32x4_p>(const uint32x4_p& vec) +{ + return vec; +} +#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) +template<> +inline uint64x2_p VectorShiftLeft<0, uint64x2_p>(const uint64x2_p& vec) +{ + return vec; +} +#endif + /// \brief Shift a vector right /// \tparam C shift byte count /// \tparam T vector type @@ -235,48 +225,86 @@ template inline T VectorShiftRight(const T& vec) { #if CRYPTOPP_BIG_ENDIAN - enum { R=(C)&0xf }; - const T zero = VectorXor(vec, vec); - return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, R); + enum { R=(16-C)&0xf, S=R }; + const T zero = {0}; + return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, S); #else - enum { R=(16-C)&0xf }; - const T zero = VectorXor(vec, vec); - return (T)vec_sld((uint8x16_p)zero, (uint8x16_p)vec, R); + enum { R=(C)&0xf, S=R }; + const T zero = {0}; + return (T)vec_sld((uint8x16_p)vec, (uint8x16_p)zero, S); #endif } -/// \brief Shift two vectors right -/// \tparam C shift byte count +// Full specializations for 0 over uint8x16_p to uint64x2_p +template<> +inline uint8x16_p VectorShiftRight<0, uint8x16_p>(const uint8x16_p& vec) +{ + return vec; +} +template<> +inline uint16x8_p VectorShiftRight<0, uint16x8_p>(const uint16x8_p& vec) +{ + return vec; +} +template<> +inline uint32x4_p VectorShiftRight<0, uint32x4_p>(const uint32x4_p& vec) +{ + return vec; +} +#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) +template<> +inline uint64x2_p VectorShiftRight<0, uint64x2_p>(const uint64x2_p& vec) +{ + return vec; +} +#endif + +// Full specializations for 16 over uint8x16_p to uint64x2_p +template<> +inline uint8x16_p VectorShiftRight<16, uint8x16_p>(const uint8x16_p& vec) +{ + return vec; +} +template<> +inline uint16x8_p VectorShiftRight<16, uint16x8_p>(const uint16x8_p& vec) +{ + return vec; +} +template<> +inline uint32x4_p VectorShiftRight<16, uint32x4_p>(const uint32x4_p& vec) +{ + return vec; +} +#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) +template<> +inline uint64x2_p VectorShiftRight<16, uint64x2_p>(const uint64x2_p& vec) +{ + return vec; +} +#endif + +/// \brief Compare two vectors /// \tparam T1 vector type /// \tparam T2 vector type /// \param vec1 the first vector /// \param vec2 the second vector -/// \details VectorShiftRight() concatenates vec1 and vec2 and returns a -/// new vector after shifting the concatenation by the specified number -/// of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return -/// vector is the same type as vec1. -/// \details On big endian machines VectorShiftRight() is vec_sld(b, a, -/// 16-c). On little endian machines VectorShiftRight() is translated to -/// vec_sld(a, b, c). You should always call the function as -/// if on a big endian machine as shown below. -///
-///    uint8x16_p r0 = {0};
-///    uint8x16_p r1 = VectorLoad(ptr);
-///    uint8x16_p r5 = VectorShiftRight<12>(r0, r1);
-/// 
-/// \sa Is vec_sld -/// endian sensitive? on Stack Overflow -/// \since Crypto++ 6.0 -template -inline T1 VectorShiftRight(const T1& vec1, const T2& vec2) +/// \returns true if vec1 equals vec2, false otherwise +template +inline bool VectorEqual(const T1& vec1, const T2& vec2) { -#if CRYPTOPP_BIG_ENDIAN - enum { R=(C)&0xf }; - return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, R); -#else - enum { R=(16-C)&0xf }; - return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, R); -#endif + return 1 == vec_all_eq((uint32x4_p)vec1, (uint32x4_p)vec2); +} + +/// \brief Compare two vectors +/// \tparam T1 vector type +/// \tparam T2 vector type +/// \param vec1 the first vector +/// \param vec2 the second vector +/// \returns true if vec1 does not equal vec2, false otherwise +template +inline bool VectorNotEqual(const T1& vec1, const T2& vec2) +{ + return 0 == vec_all_eq((uint32x4_p)vec1, (uint32x4_p)vec2); } #endif // POWER4 and above @@ -296,10 +324,10 @@ inline uint32x4_p VectorLoadBE(const uint8_t src[16]) #if defined(CRYPTOPP_XLC_VERSION) return (uint32x4_p)vec_xl_be(0, (byte*)src); #else -# if defined(CRYPTOPP_LITTLE_ENDIAN) - return (uint32x4_p)Reverse(vec_vsx_ld(0, src)); -# else +# if defined(CRYPTOPP_BIG_ENDIAN) return (uint32x4_p)vec_vsx_ld(0, src); +# else + return (uint32x4_p)Reverse(vec_vsx_ld(0, src)); # endif #endif } @@ -317,10 +345,10 @@ inline uint32x4_p VectorLoadBE(int off, const uint8_t src[16]) #if defined(CRYPTOPP_XLC_VERSION) return (uint32x4_p)vec_xl_be(off, (byte*)src); #else -# if defined(CRYPTOPP_LITTLE_ENDIAN) - return (uint32x4_p)Reverse(vec_vsx_ld(off, src)); -# else +# if defined(CRYPTOPP_BIG_ENDIAN) return (uint32x4_p)vec_vsx_ld(off, src); +# else + return (uint32x4_p)Reverse(vec_vsx_ld(off, src)); # endif #endif } @@ -371,10 +399,10 @@ inline void VectorStoreBE(const T& src, uint8_t dest[16]) #if defined(CRYPTOPP_XLC_VERSION) vec_xst_be((uint8x16_p)src, 0, dest); #else -# if defined(CRYPTOPP_LITTLE_ENDIAN) - vec_vsx_st(Reverse((uint8x16_p)src), 0, dest); -# else +# if defined(CRYPTOPP_BIG_ENDIAN) vec_vsx_st((uint8x16_p)src, 0, dest); +# else + vec_vsx_st((uint8x16_p)Reverse(src), 0, dest); # endif #endif } @@ -395,10 +423,10 @@ inline void VectorStoreBE(const T& src, int off, uint8_t dest[16]) #if defined(CRYPTOPP_XLC_VERSION) vec_xst_be((uint8x16_p)src, off, dest); #else -# if defined(CRYPTOPP_LITTLE_ENDIAN) - vec_vsx_st(Reverse((uint8x16_p)src), off, dest); -# else +# if defined(CRYPTOPP_BIG_ENDIAN) vec_vsx_st((uint8x16_p)src, off, dest); +# else + vec_vsx_st((uint8x16_p)Reverse(src), off, dest); # endif #endif } @@ -472,37 +500,29 @@ inline uint32x4_p VectorLoadBE(const uint8_t src[16]) #if defined(CRYPTOPP_BIG_ENDIAN) return (uint32x4_p)VectorLoad(src); #else - const uint8x16_p data = (uint8x16_p)VectorLoad(src); - const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; - return (uint32x4_p)vec_perm(data, data, mask); + return (uint32x4_p)Reverse(VectorLoad(src)); #endif } -inline void VectorStore(const uint32x4_p data, byte dest[16]) +template +inline void VectorStore(const T& data, byte dest[16]) { -#if defined(CRYPTOPP_LITTLE_ENDIAN) - const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; - const uint8x16_p t1 = (uint8x16_p)vec_perm(data, data, mask); -#else - const uint8x16_p t1 = (uint8x16_p)data; -#endif - if (IsAlignedOn(dest, 16)) { - vec_st(t1, 0, dest); + vec_st((uint8x16_p)data, 0, dest); } else { // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf - const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest)); - vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest); - vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest); - vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest); - vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest); - vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest); - vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest); - vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest); - vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest); + uint8x16_p perm = (uint8x16_p)vec_perm(data, data, vec_lvsr(0, dest)); + vec_ste((uint8x16_p) perm, 0, (unsigned char*) dest); + vec_ste((uint16x8_p) perm, 1, (unsigned short*)dest); + vec_ste((uint32x4_p) perm, 3, (unsigned int*) dest); + vec_ste((uint32x4_p) perm, 4, (unsigned int*) dest); + vec_ste((uint32x4_p) perm, 8, (unsigned int*) dest); + vec_ste((uint32x4_p) perm, 12, (unsigned int*) dest); + vec_ste((uint16x8_p) perm, 14, (unsigned short*)dest); + vec_ste((uint8x16_p) perm, 15, (unsigned char*) dest); } } @@ -521,8 +541,7 @@ inline void VectorStoreBE(const T& src, uint8_t dest[16]) #if defined(CRYPTOPP_BIG_ENDIAN) VectorStore(src, dest); #else - const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0}; - VectorStore(vec_perm(src, src, mask), dest); + VectorStore(Reverse(src), dest); #endif }