ext-cryptopp/darn.cpp
Jeffrey Walton a0e21c77ae
Add LSH dynamic dispatch (PR #1032)
This commit adds dynamic dispatch to LSH. The implementation pivots on AVX2 and SSSE3.
2021-04-26 04:50:48 -04:00

237 lines
6.6 KiB
C++

// darn.cpp - written and placed in public domain by Jeffrey Walton
#include "pch.h"
#include "config.h"
#include "cryptlib.h"
#include "secblock.h"
#include "darn.h"
#include "cpu.h"
// At the moment only GCC 7.0 (and above) seems to support __builtin_darn()
// and __builtin_darn_32(). Clang 7.0 does not provide them, but it does
// support assembly instructions. XLC is unknown, but there are no hits when
// searching IBM's site. To cover more platforms we provide GCC inline
// assembly like we do with RDRAND and RDSEED. Platforms that don't support
// GCC inline assembly or the builtin will fail the compile.
// Inline assembler available in GCC 3.2 or above. For practical
// purposes we check for GCC 4.0 or above. GCC impostors claim
// to be GCC 4.2.1 so it will capture them, too. We exclude the
// Apple machines because they are not Power9 and use a slightly
// different syntax in their assembler.
#if ((__GNUC__ >= 4) || defined(__IBM_GCC_ASM)) && !defined(__APPLE__)
# define GCC_DARN_ASM_AVAILABLE 1
#endif
// warning C4702: unreachable code
#if CRYPTOPP_MSC_VERSION
# pragma warning(disable: 4702)
#endif
/////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////
NAMESPACE_BEGIN(CryptoPP)
#if (CRYPTOPP_BOOL_PPC32 || CRYPTOPP_BOOL_PPC64)
// *************************** 32-bit *************************** //
#if (CRYPTOPP_BOOL_PPC32)
// Fills 4 bytes, buffer must be aligned
inline void DARN32(void* output)
{
CRYPTOPP_ASSERT(IsAlignedOn(output, GetAlignmentOf<word32>()));
word32* ptr = reinterpret_cast<word32*>(output);
#if defined(GCC_DARN_ASM_AVAILABLE)
// This is "darn r3, 0". When L=0 a 32-bit conditioned word
// is returned. On failure 0xffffffffffffffff is returned.
// The Power manual recommends only checking the low 32-bit
// word for this case. See Power ISA 3.0 specification, p. 78.
do
{
__asm__ __volatile__ (
#if (CRYPTOPP_BIG_ENDIAN)
".byte 0x7c, 0x60, 0x05, 0xe6 \n\t" // r3 = darn 3, 0
"mr %0, 3 \n\t" // val = r3
#else
".byte 0xe6, 0x05, 0x60, 0x7c \n\t" // r3 = darn 3, 0
"mr %0, 3 \n\t" // val = r3
#endif
: "=r" (*ptr) : : "r3"
);
} while (*ptr == 0xFFFFFFFFu);
#elif defined(_ARCH_PWR9)
// This is probably going to break some platforms.
// We will deal with them as we encounter them.
*ptr = __builtin_darn_32();
#elif defined(__APPLE__)
// Nop. Apple G4 and G5 machines are too old. They will
// avoid this code path because HasPower9() returns false.
CRYPTOPP_ASSERT(0);
#else
// Catch other compile breaks
int XXX[-1];
#endif
}
#endif // PPC32
// *************************** 64-bit *************************** //
#if (CRYPTOPP_BOOL_PPC64)
// Fills 8 bytes, buffer must be aligned
inline void DARN64(void* output)
{
CRYPTOPP_ASSERT(IsAlignedOn(output, GetAlignmentOf<word64>()));
word64* ptr = reinterpret_cast<word64*>(output);
#if defined(GCC_DARN_ASM_AVAILABLE)
// This is "darn r3, 1". When L=1 a 64-bit conditioned word
// is returned. On failure 0xffffffffffffffff is returned.
// See Power ISA 3.0 specification, p. 78.
do
{
__asm__ __volatile__ (
#if (CRYPTOPP_BIG_ENDIAN)
".byte 0x7c, 0x61, 0x05, 0xe6 \n\t" // r3 = darn 3, 1
"mr %0, 3 \n\t" // val = r3
#else
".byte 0xe6, 0x05, 0x61, 0x7c \n\t" // r3 = darn 3, 1
"mr %0, 3 \n\t" // val = r3
#endif
: "=r" (*ptr) : : "r3"
);
} while (*ptr == 0xFFFFFFFFFFFFFFFFull);
#elif defined(_ARCH_PWR9)
// This is probably going to break some platforms.
// We will deal with them as we encounter them.
*ptr = __builtin_darn();
#elif defined(__APPLE__)
// Nop. Apple G4 and G5 machines are too old. They will
// avoid this code path because HasPower9() returns false.
CRYPTOPP_ASSERT(0);
#else
// Catch other compile breaks
int XXX[-1];
#endif
}
#endif // PPC64
// ************************ Standard C++ ************************ //
DARN::DARN()
{
if (!HasDARN())
throw DARN_Err("HasDARN");
// Scratch buffer in case user buffers are unaligned.
m_temp.New(8);
}
void DARN::GenerateBlock(byte *output, size_t size)
{
CRYPTOPP_ASSERT((output && size) || !(output || size));
if (size == 0) return;
size_t i = 0;
#if (CRYPTOPP_BOOL_PPC64)
// Check alignment
i = reinterpret_cast<uintptr_t>(output) & 0x7;
if (i != 0)
{
DARN64(m_temp);
std::memcpy(output, m_temp, i);
output += i;
size -= i;
}
// Output is aligned
for (i = 0; i < size/8; i++)
DARN64(output+i*8);
output += i*8;
size -= i*8;
if (size)
{
DARN64(m_temp);
std::memcpy(output, m_temp, size);
}
#elif (CRYPTOPP_BOOL_PPC32)
// Check alignment
i = reinterpret_cast<uintptr_t>(output) & 0x3;
if (i != 0)
{
DARN32(m_temp);
std::memcpy(output, m_temp, i);
output += i;
size -= i;
}
// Output is aligned
for (i = 0; i < size/4; i++)
DARN32(output+i*4);
output += i*4;
size -= i*4;
if (size)
{
DARN32(m_temp);
std::memcpy(output, m_temp, size);
}
#else
// No suitable compiler found
CRYPTOPP_UNUSED(output);
throw NotImplemented("DARN: failed to find a suitable implementation");
#endif
}
void DARN::DiscardBytes(size_t n)
{
// RoundUpToMultipleOf is used because a full word is read, and its cheaper
// to discard full words. There's no sense in dealing with tail bytes.
FixedSizeSecBlock<word64, 16> discard;
n = RoundUpToMultipleOf(n, sizeof(word64));
size_t count = STDMIN(n, discard.SizeInBytes());
while (count)
{
GenerateBlock(discard.BytePtr(), count);
n -= count;
count = STDMIN(n, discard.SizeInBytes());
}
}
#else // not PPC32 or PPC64
DARN::DARN()
{
throw DARN_Err("HasDARN");
}
void DARN::GenerateBlock(byte *output, size_t size)
{
// Constructor will throw, should not get here
CRYPTOPP_UNUSED(output); CRYPTOPP_UNUSED(size);
}
void DARN::DiscardBytes(size_t n)
{
// Constructor will throw, should not get here
CRYPTOPP_UNUSED(n);
}
#endif // PPC32 or PPC64
NAMESPACE_END