Split Blake2 SIMD files into blake2s-simd.cpp and blake2b-simd.cpp (GH #729, GH #731)

The split was required for Blake2b and Power8; Blake2s only requires Power7
This commit is contained in:
Jeffrey Walton 2018-11-02 19:09:36 -04:00
parent d2b64a4d63
commit 5dca85b819
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
7 changed files with 1117 additions and 1020 deletions

View File

@ -35,7 +35,8 @@ bench2.cpp
bench3.cpp
bfinit.cpp
blake2.cpp
blake2-simd.cpp
blake2s-simd.cpp
blake2b-simd.cpp
blake2.h
blowfish.cpp
blowfish.h

View File

@ -257,7 +257,8 @@ ifeq ($(findstring -DCRYPTOPP_DISABLE_SSSE3,$(CXXFLAGS)),)
ifeq ($(findstring -DCRYPTOPP_DISABLE_SSE4,$(CXXFLAGS)),)
HAVE_SSE4 = $(shell $(CXX) $(CXXFLAGS) -msse4.1 -dM -E pch.cpp 2>&1 | $(GREP) -i -c __SSE4_1__)
ifeq ($(HAVE_SSE4),1)
BLAKE2_FLAG = -msse4.1
BLAKE2B_FLAG = -msse4.1
BLAKE2S_FLAG = -msse4.1
SIMON64_FLAG = -msse4.1
SPECK64_FLAG = -msse4.1
endif
@ -309,7 +310,8 @@ ifeq ($(SUN_COMPILER),1)
endif
COUNT := $(shell $(CXX) $(CXXFLAGS) -E -xarch=sse4_1 -xdumpmacros /dev/null 2>&1 | $(GREP) -i -c "illegal")
ifeq ($(COUNT),0)
BLAKE2_FLAG = -xarch=sse4_1 -D__SSE4_1__=1
BLAKE2B_FLAG = -xarch=sse4_1 -D__SSE4_1__=1
BLAKE2S_FLAG = -xarch=sse4_1 -D__SSE4_1__=1
SIMON64_FLAG = -xarch=sse4_1 -D__SSE4_1__=1
SPECK64_FLAG = -xarch=sse4_1 -D__SSE4_1__=1
LDFLAGS += -xarch=sse4_1
@ -377,7 +379,8 @@ ifeq ($(IS_NEON),1)
AES_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
CRC_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
GCM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
BLAKE2_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
BLAKE2B_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
BLAKE2S_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
CHACHA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
CHAM_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
LEA_FLAG = -march=armv7-a -mfloat-abi=$(FP_ABI) -mfpu=neon
@ -395,7 +398,8 @@ ifeq ($(IS_ARMV8),1)
HAVE_NEON = $(shell $(CXX) $(CXXFLAGS) -march=armv8-a -dM -E pch.cpp 2>&1 | $(GREP) -i -c __ARM_NEON)
ifeq ($(HAVE_NEON),1)
ARIA_FLAG = -march=armv8-a
BLAKE2_FLAG = -march=armv8-a
BLAKE2B_FLAG = -march=armv8-a
BLAKE2S_FLAG = -march=armv8-a
CHACHA_FLAG = -march=armv8-a
CHAM_FLAG = -march=armv8-a
LEA_FLAG = -march=armv8-a
@ -432,7 +436,8 @@ ifneq ($(IS_PPC32)$(IS_PPC64),00)
ifneq ($(HAVE_POWER8),0)
POWER8_FLAG = -mcpu=power8 -maltivec
AES_FLAG = $(POWER8_FLAG)
BLAKE2_FLAG = $(POWER8_FLAG)
BLAKE2B_FLAG = $(POWER8_FLAG)
BLAKE2S_FLAG = $(POWER8_FLAG)
CHACHA_FLAG = $(POWER8_FLAG)
GCM_FLAG = $(POWER8_FLAG)
SHA_FLAG = $(POWER8_FLAG)
@ -448,6 +453,7 @@ ifneq ($(IS_PPC32)$(IS_PPC64),00)
ifneq ($(HAVE_POWER7),0)
POWER7_FLAG = -mcpu=power7 -maltivec
ARIA_FLAG = $(POWER7_FLAG)
BLAKE2S_FLAG = $(POWER7_FLAG)
CHAM_FLAG = $(POWER7_FLAG)
LEA_FLAG = $(POWER7_FLAG)
SIMECK_FLAG = $(POWER7_FLAG)
@ -466,7 +472,8 @@ ifneq ($(IS_PPC32)$(IS_PPC64),00)
ifneq ($(HAVE_POWER8),0)
POWER8_FLAG = -qarch=pwr8 -qaltivec
AES_FLAG = $(POWER8_FLAG)
BLAKE2_FLAG = $(POWER8_FLAG)
BLAKE2B_FLAG = $(POWER8_FLAG)
BLAKE2S_FLAG = $(POWER8_FLAG)
CHACHA_FLAG = $(POWER8_FLAG)
GCM_FLAG = $(POWER8_FLAG)
SHA_FLAG = $(POWER8_FLAG)
@ -482,6 +489,7 @@ ifneq ($(IS_PPC32)$(IS_PPC64),00)
ifneq ($(HAVE_POWER7),0)
POWER7_FLAG = -qarch=pwr7 -qaltivec
ARIA_FLAG = $(POWER7_FLAG)
BLAKE2S_FLAG = $(POWER7_FLAG)
CHAM_FLAG = $(POWER7_FLAG)
LEA_FLAG = $(POWER7_FLAG)
SIMECK_FLAG = $(POWER7_FLAG)
@ -502,7 +510,8 @@ ifneq ($(IS_PPC32)$(IS_PPC64),00)
ifneq ($(HAVE_LLVM),0)
POWER7_FLAG = $(POWER8_FLAG)
ARIA_FLAG = $(POWER8_FLAG)
BLAKE2_FLAG = $(POWER8_FLAG)
BLAKE2B_FLAG = $(POWER8_FLAG)
BLAKE2S_FLAG = $(POWER8_FLAG)
CHACHA_FLAG = $(POWER8_FLAG)
CHAM_FLAG = $(POWER8_FLAG)
LEA_FLAG = $(POWER8_FLAG)
@ -1176,9 +1185,13 @@ aes-armv4.o : aes-armv4.S
aria-simd.o : aria-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(ARIA_FLAG) -c) $<
# SSE4.1 or ARMv8a available
blake2-simd.o : blake2-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(BLAKE2_FLAG) -c) $<
# SSE, NEON or POWER7 available
blake2s-simd.o : blake2s-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(BLAKE2S_FLAG) -c) $<
# SSE, NEON or POWER8 available
blake2b-simd.o : blake2b-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(BLAKE2B_FLAG) -c) $<
# SSE2 or NEON available
chacha-simd.o : chacha-simd.cpp

File diff suppressed because it is too large Load Diff

1078
blake2s-simd.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -53,8 +53,8 @@
LIB_SRCS = \
cryptlib.cpp cpu.cpp integer.cpp 3way.cpp adler32.cpp algebra.cpp \
algparam.cpp arc4.cpp aria-simd.cpp aria.cpp ariatab.cpp asn.cpp \
authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp blake2-simd.cpp \
blake2.cpp blowfish.cpp blumshub.cpp camellia.cpp cast.cpp casts.cpp \
authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp blake2s-simd.cpp \
blake2b-simd.cpp blake2.cpp blowfish.cpp blumshub.cpp camellia.cpp cast.cpp casts.cpp \
cbcmac.cpp ccm.cpp chacha-simd.cpp chacha.cpp cham-simd.cpp cham.cpp channels.cpp \
cmac.cpp crc-simd.cpp crc.cpp default.cpp des.cpp dessp.cpp dh.cpp \
dh2.cpp dll.cpp dsa.cpp eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp \
@ -81,8 +81,8 @@ LIB_SRCS = \
LIB_OBJS = \
cryptlib.obj cpu.obj integer.obj 3way.obj adler32.obj algebra.obj \
algparam.obj arc4.obj aria-simd.obj aria.obj ariatab.obj asn.obj \
authenc.obj base32.obj base64.obj basecode.obj bfinit.obj blake2-simd.obj \
blake2.obj blowfish.obj blumshub.obj camellia.obj cast.obj casts.obj \
authenc.obj base32.obj base64.obj basecode.obj bfinit.obj blake2s-simd.obj \
blake2b-simd.obj blake2.obj blowfish.obj blumshub.obj camellia.obj cast.obj casts.obj \
cbcmac.obj ccm.obj chacha-simd.obj chacha.obj cham-simd.obj cham.obj channels.obj \
cmac.obj crc-simd.obj crc.obj default.obj des.obj dessp.obj dh.obj \
dh2.obj dll.obj dsa.obj eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj \

View File

@ -182,7 +182,8 @@
<ClCompile Include="basecode.cpp" />
<ClCompile Include="bfinit.cpp" />
<ClCompile Include="blake2.cpp" />
<ClCompile Include="blake2-simd.cpp" />
<ClCompile Include="blake2s-simd.cpp" />
<ClCompile Include="blake2b-simd.cpp" />
<ClCompile Include="blowfish.cpp" />
<ClCompile Include="blumshub.cpp" />
<ClCompile Include="camellia.cpp" />

View File

@ -59,7 +59,10 @@
<ClCompile Include="blake2.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="blake2-simd.cpp">
<ClCompile Include="blake2s-simd.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="blake2b-simd.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="blowfish.cpp">