Rework Makefile and ppc-simd.h for XLC and LLVM front-end changes

This commit is contained in:
Jeffrey Walton 2018-08-05 05:39:42 -04:00
parent da00422d3c
commit 1dd0e321a6
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
5 changed files with 492 additions and 446 deletions

View File

@ -101,6 +101,16 @@ ifeq ($(wildcard adhoc.cpp),)
$(shell cp adhoc.cpp.proto adhoc.cpp)
endif
# Fixup AIX
ifeq ($(IS_AIX),1)
BITNESS=$(shell getconf KERNEL_BITMODE)
ifeq ($(BITNESS),64)
IS_PPC64=1
else
IS_PPC32=1
endif
endif
###########################################################
##### General Variables #####
###########################################################
@ -400,63 +410,93 @@ ifeq ($(IS_ARMV8),1)
endif
endif
# PowerPC and PowerPC-64. Altivec is available with Power4
ifneq ($(IS_PPC32)$(IS_PPC64)$(IS_AIX),000)
# PowerPC and PowerPC-64. Altivec is available with Power4.
# The tests below are crafted for IBM XLC and the LLVM front-end.
# XLC/LLVM only supplies POWER8. So we set the flags for XLC/LLVM
# and lower it if POWER7 or ALTIVEC is available. I've got a
# feeling LLVM is going to cause a lot of troubles.
ifneq ($(IS_PPC32)$(IS_PPC64),00)
HAVE_POWER8 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power8 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
ifneq ($(HAVE_POWER8),0)
POWER8_FLAG = -mcpu=power8 -maltivec
AES_FLAG = $(POWER8_FLAG)
GCM_FLAG = $(POWER8_FLAG)
SHA_FLAG = $(POWER8_FLAG)
SM4_FLAG = $(POWER8_FLAG)
endif
# GCC and some compatibles
HAVE_POWER7 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power7 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c '_ARCH_PWR7')
ifneq ($(HAVE_POWER7),0)
POWER7_FLAG = -mcpu=power7 -maltivec
ARIA_FLAG = $(POWER7_FLAG)
BLAKE2_FLAG = $(POWER7_FLAG)
CHAM_FLAG = $(POWER7_FLAG)
LEA_FLAG = $(POWER7_FLAG)
SIMON_FLAG = $(POWER7_FLAG)
SPECK_FLAG = $(POWER7_FLAG)
SIMECK_FLAG = $(POWER7_FLAG)
endif
# GCC and some compatibles
HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power4 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
ifneq ($(HAVE_ALTIVEC),0)
ALTIVEC_FLAG = -mcpu=power4 -maltivec
ARIA_FLAG = -mcpu=power4 -maltivec
BLAKE2_FLAG = -mcpu=power4 -maltivec
CHAM_FLAG = -mcpu=power4 -maltivec
LEA_FLAG = -mcpu=power4 -maltivec
SIMON_FLAG = -mcpu=power4 -maltivec
SPECK_FLAG = -mcpu=power4 -maltivec
SIMECK_FLAG = -mcpu=power4 -maltivec
SM4_FLAG = -mcpu=power7 -maltivec
endif
# GCC and some compatibles
HAVE_CRYPTO = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -mcpu=power8 -maltivec -dM -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
ifneq ($(HAVE_CRYPTO),0)
ALTIVEC_FLAG = -mcpu=power8 -maltivec
AES_FLAG = -mcpu=power8 -maltivec
GCM_FLAG = -mcpu=power8 -maltivec
SHA_FLAG = -mcpu=power8 -maltivec
CHAM_FLAG = -mcpu=power8 -maltivec
LEA_FLAG = -mcpu=power8 -maltivec
SIMON_FLAG = -mcpu=power8 -maltivec
SPECK_FLAG = -mcpu=power8 -maltivec
SIMECK_FLAG = -mcpu=power8 -maltivec
SM4_FLAG = -mcpu=power8 -maltivec
endif
# IBM XL C/C++
HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr7 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
HAVE_POWER8 = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr8 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
ifneq ($(HAVE_POWER8),0)
POWER8_FLAG = -qarch=pwr8 -qaltivec
AES_FLAG = $(POWER8_FLAG)
GCM_FLAG = $(POWER8_FLAG)
SHA_FLAG = $(POWER8_FLAG)
SM4_FLAG = $(POWER8_FLAG)
endif
# IBM XL C/C++
HAVE_POWER7 = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr7 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR7')
ifneq ($(HAVE_POWER7),0)
POWER7_FLAG = -qarch=pwr7 -qaltivec
ARIA_FLAG = $(POWER7_FLAG)
BLAKE2_FLAG = $(POWER7_FLAG)
CHAM_FLAG = $(POWER7_FLAG)
LEA_FLAG = $(POWER7_FLAG)
SIMECK_FLAG = $(POWER7_FLAG)
SIMON_FLAG = $(POWER7_FLAG)
SPECK_FLAG = $(POWER7_FLAG)
endif
# IBM XL C/C++
HAVE_ALTIVEC = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr6 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c '__ALTIVEC__')
ifneq ($(HAVE_ALTIVEC),0)
ALTIVEC_FLAG = -qarch=pwr7 -qaltivec
ARIA_FLAG = -qarch=pwr7 -qaltivec
BLAKE2_FLAG = -qarch=pwr7 -qaltivec
CHAM_FLAG = -qarch=pwr7 -qaltivec
LEA_FLAG = -qarch=pwr7 -qaltivec
SIMECK_FLAG = -qarch=pwr7 -qaltivec
SIMON_FLAG = -qarch=pwr7 -qaltivec
SPECK_FLAG = -qarch=pwr7 -qaltivec
SM4_FLAG = -qarch=pwr7 -qaltivec
ALTIVEC_FLAG = -qarch=pwr6 -qaltivec
endif
# IBM XL C/C++
HAVE_CRYPTO = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -qarch=pwr8 -qaltivec -E adhoc.cpp 2>&1 | $(GREP) -i -c -E '_ARCH_PWR8|_ARCH_PWR9|__CRYPTO')
ifneq ($(HAVE_CRYPTO),0)
ALTIVEC_FLAG = -qarch=pwr8 -qaltivec
AES_FLAG = -qarch=pwr8 -qaltivec
GCM_FLAG = -qarch=pwr8 -qaltivec
SHA_FLAG = -qarch=pwr8 -qaltivec
ARIA_FLAG = -qarch=pwr8 -qaltivec
BLAKE2_FLAG = -qarch=pwr8 -qaltivec
CHAM_FLAG = -qarch=pwr8 -qaltivec
LEA_FLAG = -qarch=pwr8 -qaltivec
SIMECK_FLAG = -qarch=pwr8 -qaltivec
SIMON_FLAG = -qarch=pwr8 -qaltivec
SPECK_FLAG = -qarch=pwr8 -qaltivec
SM4_FLAG = -qarch=pwr8 -qaltivec
# LLVM front-ends only provide Power8. It really jambs us up
# for ppc-simd.cpp which needs ALTIVEC/POWER4. We have similar
# problems {lea|cham|simon|speck|...}-simd.cpp and POWER7.
HAVE_LLVM = $(shell $(CXX) $(CXXFLAGS) -qshowmacros -E adhoc.cpp 2>&1 | $(GREP) -i -c '__llvm__')
ifneq ($(HAVE_LLVM),0)
POWER7_FLAG = $(POWER8_FLAG)
ARIA_FLAG = $(POWER8_FLAG)
BLAKE2_FLAG = $(POWER8_FLAG)
CHAM_FLAG = $(POWER8_FLAG)
LEA_FLAG = $(POWER8_FLAG)
SIMECK_FLAG = $(POWER8_FLAG)
SIMON_FLAG = $(POWER8_FLAG)
SPECK_FLAG = $(POWER8_FLAG)
ALTIVEC_FLAG = $(POWER8_FLAG)
endif
ifeq ($(ALTIVEC_FLAG),)
CXXFLAGS += -DCRYPTOPP_DISABLE_ALTIVEC
endif
ifeq ($(POWER7_FLAG),)
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER7
endif
ifeq ($(POWER8_FLAG),)
CXXFLAGS += -DCRYPTOPP_DISABLE_POWER8
endif
endif

View File

@ -6,14 +6,6 @@
// is needed because additional CXXFLAGS are required to enable the
// appropriate instructions sets in some build configurations.
// TODO: Bob Wilkinson reported we are misdetecting CRYPTOPP_POWER8_AVAILABLE.
// The problem is, the updated compiler supports them but the down-level
// assembler and linker do not. We will probably need to fix it through
// the makefile, similar to the way x86 AES and SHA are handled. For the time
// being CRYPTOPP_DISABLE_POWER8 will have to be applied manually. Another
// twist is, we don't have access to a test machine and it must be fixed
// for two compilers (IBM XL C/C++ and GCC). Ugh...
#include "pch.h"
#include "config.h"
#include "stdcpp.h"
@ -53,7 +45,7 @@ bool CPU_ProbeAltivec()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_ALTIVEC_AVAILABLE) || (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
#elif (CRYPTOPP_ALTIVEC_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
@ -96,239 +88,5 @@ bool CPU_ProbeAltivec()
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
}
bool CPU_ProbePower7()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = false;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
// POWER7 added unaligned loads and store operations
byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
// Specifically call the VSX loads and stores
#if defined(__xlc__) || defined(__xlC__)
vec_xst(vec_xl(0, b1+3), 0, b2+1);
#else
vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1);
#endif
result = (0 == std::memcmp(b1+3, b2+1, 16));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_POWER7_AVAILABLE
}
bool CPU_ProbePower8()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER8_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
// POWER8 added 64-bit SIMD operations
const word64 x = W64LIT(0xffffffffffffffff);
word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2];
// Specifically call the VSX loads and stores
#if defined(__xlc__) || defined(__xlC__)
const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
const uint64x2_p v3 = vec_add(v1, v2); // 64-bit add
vec_xst((uint8x16_p)v3, 0, (byte*)w3);
#else
const uint64x2_p v1 = (uint64x2_p)vec_vsx_ld(0, (byte*)w1);
const uint64x2_p v2 = (uint64x2_p)vec_vsx_ld(0, (byte*)w2);
const uint64x2_p v3 = vec_add(v1, v2); // 64-bit add
vec_vsx_st((uint8x16_p)v3, 0, (byte*)w3);
#endif
// Relies on integer wrap
result = (w3[0] == 3 && w3[1] == 5);
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_POWER8_AVAILABLE
}
bool CPU_ProbeAES()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER8_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
byte key[16] = {0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1,
0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05};
byte state[16] = {0x19, 0x3d, 0xe3, 0xb3, 0xa0, 0xf4, 0xe2, 0x2b,
0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08};
byte r[16] = {255}, z[16] = {};
uint8x16_p k = (uint8x16_p)VectorLoad(0, key);
uint8x16_p s = (uint8x16_p)VectorLoad(0, state);
s = VectorEncrypt(s, k);
s = VectorEncryptLast(s, k);
s = VectorDecrypt(s, k);
s = VectorDecryptLast(s, k);
VectorStore(s, r);
result = (0 != std::memcmp(r, z, 16));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
}
bool CPU_ProbeSHA256()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER8_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = false;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
byte r[16], z[16] = {0};
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
x = VectorSHA256<0,0>(x);
x = VectorSHA256<0,1>(x);
x = VectorSHA256<1,0>(x);
x = VectorSHA256<1,1>(x);
VectorStore(x, r);
result = (0 == std::memcmp(r, z, 16));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
}
bool CPU_ProbeSHA512()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER8_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = false;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
byte r[16], z[16] = {0};
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
x = VectorSHA512<0,0>(x);
x = VectorSHA512<0,1>(x);
x = VectorSHA512<1,0>(x);
x = VectorSHA512<1,1>(x);
VectorStore(x, r);
result = (0 == std::memcmp(r, z, 16));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_POWER8_AVAILABLE
}
# endif // CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64
NAMESPACE_END

View File

@ -19,6 +19,26 @@
#include "config.h"
#include "misc.h"
// We are boxed into undefining macros like CRYPTOPP_POWER8_AVAILABLE.
// We set CRYPTOPP_POWER8_AVAILABLE based on compiler versions because
// we needed them for the SIMD and non-SIMD files. When the SIMD file is
// compiled it may only get -mcpu=power4 or -mcpu=power7, so the POWER7
// or POWER8 stuff is not actually available when this header is included.
#if !defined(__ALTIVEC__)
# undef CRYPTOPP_ALTIVEC_AVAILABLE
#endif
#if !defined(_ARCH_PWR7)
# undef CRYPTOPP_POWER7_AVAILABLE
#endif
#if !(defined(_ARCH_PWR8) || defined(_ARCH_PWR9) || defined(_CRYPTO))
# undef CRYPTOPP_POWER8_AVAILABLE
# undef CRYPTOPP_POWER8_AES_AVAILABLE
# undef CRYPTOPP_POWER8_SHA_AVAILABLE
# undef CRYPTOPP_POWER8_PMULL_AVAILABLE
#endif
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
# include <altivec.h>
# undef vector
@ -28,84 +48,112 @@
NAMESPACE_BEGIN(CryptoPP)
// Datatypes
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
typedef __vector unsigned char uint8x16_p;
typedef __vector unsigned short uint16x8_p;
typedef __vector unsigned int uint32x4_p;
#if defined(CRYPTOPP_POWER8_AVAILABLE)
#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
typedef __vector unsigned long long uint64x2_p;
#endif
#endif // ALTIVEC/POWER4 datatypes
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
// POWER4 and above
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) && !defined(CRYPTOPP_POWER7_AVAILABLE)
inline uint32x4_p VectorLoad(const byte src[16])
/// \brief Reverse a vector
/// \tparam T vector type
/// \param src the vector
/// \details Reverse() endian swaps the bytes in a vector
/// \sa Reverse(), VectorLoadBE(), VectorLoad(), VectorLoadKey()
/// \since Crypto++ 6.0
template <class T>
inline T Reverse(const T& src)
{
uint8x16_p data;
if (IsAlignedOn(src, 16))
{
data = vec_ld(0, src);
}
else
{
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
const uint8x16_p perm = vec_lvsl(0, src);
const uint8x16_p low = vec_ld(0, src);
const uint8x16_p high = vec_ld(15, src);
data = vec_perm(low, high, perm);
}
#if defined(CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)data;
#else
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
return (uint32x4_p)vec_perm(data, data, mask);
#endif
return vec_perm(src, src, mask);
}
inline void VectorStore(const uint32x4_p data, byte dest[16])
/// \brief Permutes two vectors
/// \tparam T1 vector type
/// \tparam T2 vector type
/// \param vec1 the first vector
/// \param vec2 the second vector
/// \param mask vector mask
/// \details VectorPermute returns a new vector from vec1 and vec2
/// based on mask. mask is an uint8x16_p type vector. The return
/// vector is the same type as vec1.
/// \since Crypto++ 6.0
template <class T1, class T2>
inline T1 VectorPermute(const T1& vec1, const T1& vec2, const T2& mask)
{
return (T1)vec_perm(vec1, vec2, (uint8x16_p)mask);
}
/// \brief XOR two vectors
/// \tparam T1 vector type
/// \tparam T2 vector type
/// \param vec1 the first vector
/// \param vec2 the second vector
/// \details VectorXor returns a new vector from vec1 and vec2. The return
/// vector is the same type as vec1.
/// \since Crypto++ 6.0
template <class T1, class T2>
inline T1 VectorXor(const T1& vec1, const T2& vec2)
{
return (T1)vec_xor(vec1, (T1)vec2);
}
/// \brief Add two vector
/// \tparam T1 vector type
/// \tparam T2 vector type
/// \param vec1 the first vector
/// \param vec2 the second vector
/// \details VectorAdd returns a new vector from vec1 and vec2.
/// vec2 is cast to the same type as vec1. The return vector
/// is the same type as vec1.
/// \since Crypto++ 6.0
template <class T1, class T2>
inline T1 VectorAdd(const T1& vec1, const T2& vec2)
{
return (T1)vec_add(vec1, (T1)vec2);
}
/// \brief Shift two vectors left
/// \tparam C shift byte count
/// \tparam T1 vector type
/// \tparam T2 vector type
/// \param vec1 the first vector
/// \param vec2 the second vector
/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
/// new vector after shifting the concatenation by the specified number
/// of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return
/// vector is the same type as vec1.
/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
/// c)</tt>. On little endian machines VectorShiftLeft() is translated to
/// <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
/// if on a big endian machine as shown below.
/// <pre>
/// uint8x16_p r0 = {0};
/// uint8x16_p r1 = VectorLoad(ptr);
/// uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
/// </pre>
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
/// endian sensitive?</A> on Stack Overflow
/// \since Crypto++ 6.0
template <unsigned int C, class T1, class T2>
inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
{
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
const uint8x16_p t1 = (uint8x16_p)vec_perm(data, data, mask);
return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, 16-C);
#else
const uint8x16_p t1 = (uint8x16_p)data;
return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, C);
#endif
if (IsAlignedOn(dest, 16))
{
vec_st(t1, 0, dest);
}
else
{
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest));
vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest);
vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest);
vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest);
vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest);
vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest);
}
}
inline uint32x4_p VectorXor(const uint32x4_p vec1, const uint32x4_p vec2)
{
return vec_xor(vec1, vec2);
}
inline uint32x4_p VectorAdd(const uint32x4_p vec1, const uint32x4_p vec2)
{
return vec_add(vec1, vec2);
}
#endif
#endif // POWER4 and above
// POWER7/POWER4 load and store
#if defined(CRYPTOPP_POWER7_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
/// \brief Reverse a 16-byte array
@ -124,19 +172,6 @@ inline void ReverseByteArrayLE(byte src[16])
#endif
}
/// \brief Reverse a vector
/// \tparam T vector type
/// \param src the vector
/// \details Reverse() endian swaps the bytes in a vector
/// \sa Reverse(), VectorLoadBE(), VectorLoad(), VectorLoadKey()
/// \since Crypto++ 6.0
template <class T>
inline T Reverse(const T& src)
{
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
return vec_perm(src, src, mask);
}
/// \brief Loads a vector from a byte array
/// \param src the byte array
/// \details Loads a vector in big endian format from a byte array.
@ -346,86 +381,65 @@ inline void VectorStore(const T& src, int off, byte dest[16])
#endif
}
/// \brief Permutes two vectors
/// \tparam T1 vector type
/// \tparam T2 vector type
/// \param vec1 the first vector
/// \param vec2 the second vector
/// \param mask vector mask
/// \details VectorPermute returns a new vector from vec1 and vec2
/// based on mask. mask is an uint8x16_p type vector. The return
/// vector is the same type as vec1.
/// \since Crypto++ 6.0
template <class T1, class T2>
inline T1 VectorPermute(const T1& vec1, const T1& vec2, const T2& mask)
#else // not CRYPTOPP_POWER7_AVAILABLE
// POWER7 is not available. Slow Altivec loads and stores.
inline uint32x4_p VectorLoad(const byte src[16])
{
return (T1)vec_perm(vec1, vec2, (uint8x16_p)mask);
uint8x16_p data;
if (IsAlignedOn(src, 16))
{
data = vec_ld(0, src);
}
else
{
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
const uint8x16_p perm = vec_lvsl(0, src);
const uint8x16_p low = vec_ld(0, src);
const uint8x16_p high = vec_ld(15, src);
data = vec_perm(low, high, perm);
}
/// \brief XOR two vectors
/// \tparam T1 vector type
/// \tparam T2 vector type
/// \param vec1 the first vector
/// \param vec2 the second vector
/// \details VectorXor returns a new vector from vec1 and vec2. The return
/// vector is the same type as vec1.
/// \since Crypto++ 6.0
template <class T1, class T2>
inline T1 VectorXor(const T1& vec1, const T2& vec2)
{
return (T1)vec_xor(vec1, (T1)vec2);
}
/// \brief Add two vector
/// \tparam T1 vector type
/// \tparam T2 vector type
/// \param vec1 the first vector
/// \param vec2 the second vector
/// \details VectorAdd returns a new vector from vec1 and vec2.
/// vec2 is cast to the same type as vec1. The return vector
/// is the same type as vec1.
/// \since Crypto++ 6.0
template <class T1, class T2>
inline T1 VectorAdd(const T1& vec1, const T2& vec2)
{
return (T1)vec_add(vec1, (T1)vec2);
}
/// \brief Shift two vectors left
/// \tparam C shift byte count
/// \tparam T1 vector type
/// \tparam T2 vector type
/// \param vec1 the first vector
/// \param vec2 the second vector
/// \details VectorShiftLeft() concatenates vec1 and vec2 and returns a
/// new vector after shifting the concatenation by the specified number
/// of bytes. Both vec1 and vec2 are cast to uint8x16_p. The return
/// vector is the same type as vec1.
/// \details On big endian machines VectorShiftLeft() is <tt>vec_sld(a, b,
/// c)</tt>. On little endian machines VectorShiftLeft() is translated to
/// <tt>vec_sld(b, a, 16-c)</tt>. You should always call the function as
/// if on a big endian machine as shown below.
/// <pre>
/// uint8x16_p r0 = {0};
/// uint8x16_p r1 = VectorLoad(ptr);
/// uint8x16_p r5 = VectorShiftLeft<12>(r0, r1);
/// </pre>
/// \sa <A HREF="https://stackoverflow.com/q/46341923/608639">Is vec_sld
/// endian sensitive?</A> on Stack Overflow
/// \since Crypto++ 6.0
template <unsigned int C, class T1, class T2>
inline T1 VectorShiftLeft(const T1& vec1, const T2& vec2)
{
#if defined(CRYPTOPP_LITTLE_ENDIAN)
return (T1)vec_sld((uint8x16_p)vec2, (uint8x16_p)vec1, 16-C);
#if defined(CRYPTOPP_BIG_ENDIAN)
return (uint32x4_p)data;
#else
return (T1)vec_sld((uint8x16_p)vec1, (uint8x16_p)vec2, C);
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
return (uint32x4_p)vec_perm(data, data, mask);
#endif
}
#endif // CRYPTOPP_POWER7_AVAILABLE
inline void VectorStore(const uint32x4_p data, byte dest[16])
{
#if defined(CRYPTOPP_LITTLE_ENDIAN)
const uint8x16_p mask = {15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0};
const uint8x16_p t1 = (uint8x16_p)vec_perm(data, data, mask);
#else
const uint8x16_p t1 = (uint8x16_p)data;
#endif
#if defined(CRYPTOPP_POWER8_AES_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
if (IsAlignedOn(dest, 16))
{
vec_st(t1, 0, dest);
}
else
{
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest));
vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest);
vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest);
vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest);
vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest);
vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest);
vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest);
}
}
#endif // POWER4/POWER7 load and store
// POWER8 crypto
#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
/// \brief One round of AES encryption
/// \tparam T1 vector type
@ -507,9 +521,9 @@ inline T1 VectorDecryptLast(const T1& state, const T2& key)
#endif
}
#endif // CRYPTOPP_POWER8_AES_AVAILABLE
#endif // POWER8 crypto
#if defined(CRYPTOPP_POWER8_SHA_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
#if defined(CRYPTOPP_POWER8_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
/// \brief SHA256 Sigma functions
/// \tparam func function
@ -551,7 +565,7 @@ inline T VectorSHA512(const T& vec)
#endif
}
#endif // CRYPTOPP_POWER8_SHA_AVAILABLE
#endif // POWER8 crypto
NAMESPACE_END

View File

@ -25,13 +25,6 @@
#include "misc.h"
#include "adv-simd.h"
// We set CRYPTOPP_POWER8_CRYPTO_AVAILABLE based on compiler version.
// If the crypto is not available, then we have to disable it here.
#if !(defined(__CRYPTO) || defined(_ARCH_PWR8) || defined(_ARCH_PWR9))
# undef CRYPTOPP_POWER8_CRYPTO_AVAILABLE
# undef CRYPTOPP_POWER8_AES_AVAILABLE
#endif
#if (CRYPTOPP_AESNI_AVAILABLE)
# include <smmintrin.h>
# include <wmmintrin.h>
@ -68,6 +61,8 @@ extern const char RIJNDAEL_SIMD_FNAME[] = __FILE__;
NAMESPACE_BEGIN(CryptoPP)
// ************************* Feature Probes ************************* //
#ifdef CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
extern "C" {
typedef void (*SigHandler)(int);
@ -142,6 +137,155 @@ bool CPU_ProbeAES()
}
#endif // ARM32 or ARM64
#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64)
bool CPU_ProbePower7()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER7_AVAILABLE) || (CRYPTOPP_POWER8_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = false;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
// POWER7 added unaligned loads and store operations
byte b1[19] = {255, 255, 255, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, b2[17];
// Specifically call the VSX loads and stores
#if defined(__xlc__) || defined(__xlC__)
vec_xst(vec_xl(0, b1+3), 0, b2+1);
#else
vec_vsx_st(vec_vsx_ld(0, b1+3), 0, b2+1);
#endif
result = (0 == std::memcmp(b1+3, b2+1, 16));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_POWER7_AVAILABLE
}
bool CPU_ProbePower8()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER8_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
// POWER8 added 64-bit SIMD operations
const word64 x = W64LIT(0xffffffffffffffff);
word64 w1[2] = {x, x}, w2[2] = {4, 6}, w3[2];
// Specifically call the VSX loads and stores
#if defined(__xlc__) || defined(__xlC__)
const uint64x2_p v1 = (uint64x2_p)vec_xl(0, (byte*)w1);
const uint64x2_p v2 = (uint64x2_p)vec_xl(0, (byte*)w2);
const uint64x2_p v3 = vec_add(v1, v2); // 64-bit add
vec_xst((uint8x16_p)v3, 0, (byte*)w3);
#else
const uint64x2_p v1 = (uint64x2_p)vec_vsx_ld(0, (byte*)w1);
const uint64x2_p v2 = (uint64x2_p)vec_vsx_ld(0, (byte*)w2);
const uint64x2_p v3 = vec_add(v1, v2); // 64-bit add
vec_vsx_st((uint8x16_p)v3, 0, (byte*)w3);
#endif
// Relies on integer wrap
result = (w3[0] == 3 && w3[1] == 5);
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_POWER8_AVAILABLE
}
bool CPU_ProbeAES()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER8_AES_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = true;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
byte key[16] = {0xA0, 0xFA, 0xFE, 0x17, 0x88, 0x54, 0x2c, 0xb1,
0x23, 0xa3, 0x39, 0x39, 0x2a, 0x6c, 0x76, 0x05};
byte state[16] = {0x19, 0x3d, 0xe3, 0xb3, 0xa0, 0xf4, 0xe2, 0x2b,
0x9a, 0xc6, 0x8d, 0x2a, 0xe9, 0xf8, 0x48, 0x08};
byte r[16] = {255}, z[16] = {};
uint8x16_p k = (uint8x16_p)VectorLoad(0, key);
uint8x16_p s = (uint8x16_p)VectorLoad(0, state);
s = VectorEncrypt(s, k);
s = VectorEncryptLast(s, k);
s = VectorDecrypt(s, k);
s = VectorDecryptLast(s, k);
VectorStore(s, r);
result = (0 != std::memcmp(r, z, 16));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_POWER8_AES_AVAILABLE
}
#endif // PPC32 or PPC64
// ***************************** ARMv8 ***************************** //
#if (CRYPTOPP_ARM_AES_AVAILABLE)

View File

@ -185,6 +185,96 @@ bool CPU_ProbeSHA2()
}
#endif // ARM32 or ARM64
#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64)
bool CPU_ProbeSHA256()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER8_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = false;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
byte r[16], z[16] = {0};
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
x = VectorSHA256<0,0>(x);
x = VectorSHA256<0,1>(x);
x = VectorSHA256<1,0>(x);
x = VectorSHA256<1,1>(x);
VectorStore(x, r);
result = (0 == std::memcmp(r, z, 16));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_ALTIVEC_AVAILABLE
}
bool CPU_ProbeSHA512()
{
#if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES)
return false;
#elif (CRYPTOPP_POWER8_AVAILABLE)
# if defined(CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY)
// longjmp and clobber warnings. Volatile is required.
// http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854
volatile int result = false;
volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
volatile sigset_t oldMask;
if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask))
return false;
if (setjmp(s_jmpSIGILL))
result = false;
else
{
byte r[16], z[16] = {0};
uint8x16_p x = ((uint8x16_p){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0});
x = VectorSHA512<0,0>(x);
x = VectorSHA512<0,1>(x);
x = VectorSHA512<1,0>(x);
x = VectorSHA512<1,1>(x);
VectorStore(x, r);
result = (0 == std::memcmp(r, z, 16));
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR);
signal(SIGILL, oldHandler);
return result;
# endif
#else
return false;
#endif // CRYPTOPP_POWER8_AVAILABLE
}
#endif // PPC32 or PPC64
// ***************** Intel x86 SHA ********************
// provided by sha.cpp, 16-byte aigned