Add GNUmakefile-cross flags SIMON and SPECK

This commit is contained in:
Jeffrey Walton 2018-08-16 18:24:22 -04:00
parent a4c5bdf821
commit 2876371cea
No known key found for this signature in database
GPG Key ID: B36AB348921B1838
2 changed files with 34 additions and 16 deletions

View File

@ -264,8 +264,10 @@ ifeq ($(IS_NEON),1)
BLAKE2_FLAG += -mfpu=neon
LEA_FLAG += -mfpu=neon
SIMECK_FLAG += -mfpu=neon
SIMON_FLAG += -mfpu=neon
SPECK_FLAG += -mfpu=neon
SIMON64_FLAG += -mfpu=neon
SIMON128_FLAG += -mfpu=neon
SPECK64_FLAG += -mfpu=neon
SPECK128_FLAG += -mfpu=neon
ifeq ($(IS_ANDROID),1)
ifeq ($(findstring -mfloat-abi=softfp,$(CXXFLAGS)),)
NEON_FLAG += -mfloat-abi=softfp
@ -274,8 +276,10 @@ ifeq ($(IS_NEON),1)
BLAKE2_FLAG += -mfloat-abi=softfp
LEA_FLAG += -mfloat-abi=softfp
SIMECK_FLAG += -mfloat-abi=softfp
SIMON_FLAG += -mfloat-abi=softfp
SPECK_FLAG += -mfloat-abi=softfp
SIMON64_FLAG += -mfloat-abi=softfp
SIMON128_FLAG += -mfloat-abi=softfp
SPECK64_FLAG += -mfloat-abi=softfp
SPECK128_FLAG += -mfloat-abi=softfp
endif
endif
endif
@ -290,8 +294,10 @@ ifneq ($(IS_ARMv8),0)
LEA_FLAG = -march=armv8-a
NEON_FLAG = -march=armv8-a
SIMECK_FLAG = -march=armv8-a
SIMON_FLAG = -march=armv8-a
SPECK_FLAG = -march=armv8-a
SIMON64_FLAG = -march=armv8-a
SIMON128_FLAG = -march=armv8-a
SPECK64_FLAG = -march=armv8-a
SPECK128_FLAG = -march=armv8-a
endif
HAVE_CRC := $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -march=armv8-a+crc -dM -E adhoc.cpp 2>&1 | $(EGREP) -i -c __ARM_FEATURE_CRC32)
ifeq ($(HAVE_CRC),1)
@ -318,13 +324,13 @@ ifneq ($(IS_i686)$(IS_x86_64),00)
LEA_FLAG = -mssse3
SSSE3_FLAG = -mssse3
SIMECK_FLAG = -mssse3
SIMON_FLAG = -mssse3
SPECK_FLAG = -mssse3
SIMON128_FLAG = -mssse3
SPECK128_FLAG = -mssse3
endif
HAVE_SSE4 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -msse4.1 -dM -E adhoc.cpp 2>&1 | $(EGREP) -i -c __SSE4_1__)
ifeq ($(HAVE_SSE4),1)
SIMON_FLAG = -msse4.1
SPECK_FLAG = -msse4.1
SIMON64_FLAG = -msse4.1
SPECK64_FLAG = -msse4.1
endif
HAVE_SSE4 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -msse4.2 -dM -E adhoc.cpp 2>&1 | $(EGREP) -i -c __SSE4_2__)
ifeq ($(HAVE_SSE4),1)
@ -604,13 +610,21 @@ shacal2-simd.o : shacal2-simd.cpp
simeck-simd.o : simeck-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(SIMECK_FLAG) -c) $<
# SSSE3 or NEON available
simon-simd.o : simon-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(SIMON_FLAG) -c) $<
# SSE4.1, NEON or POWER7 available
simon64-simd.o : simon64-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(SIMON64_FLAG) -c) $<
# SSSE3 or NEON available
speck-simd.o : speck-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(SPECK_FLAG) -c) $<
# SSSE3, NEON or POWER8 available
simon128-simd.o : simon128-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(SIMON128_FLAG) -c) $<
# SSE4.1, NEON or POWER7 available
speck64-simd.o : speck64-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(SPECK64_FLAG) -c) $<
# SSSE3, NEON or POWER8 available
speck128-simd.o : speck128-simd.cpp
$(CXX) $(strip $(CXXFLAGS) $(SPECK128_FLAG) -c) $<
# AESNI available
sm4-simd.o : sm4-simd.cpp

View File

@ -342,6 +342,7 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& stat
row2 = ff1 = LOADU( &state.h[4] );
row3 = LOADU( &BLAKE2S_IV[0] );
row4 = _mm_xor_si128( LOADU( &BLAKE2S_IV[4] ), LOADU( &state.t[0] ) );
BLAKE2S_ROUND( 0 );
BLAKE2S_ROUND( 1 );
BLAKE2S_ROUND( 2 );
@ -352,6 +353,7 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& stat
BLAKE2S_ROUND( 7 );
BLAKE2S_ROUND( 8 );
BLAKE2S_ROUND( 9 );
STOREU( &state.h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
STOREU( &state.h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
}
@ -752,6 +754,7 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state
row3h = LOADU( &BLAKE2B_IV[2] );
row4l = _mm_xor_si128( LOADU( &BLAKE2B_IV[4] ), LOADU( &state.t[0] ) );
row4h = _mm_xor_si128( LOADU( &BLAKE2B_IV[6] ), LOADU( &state.f[0] ) );
BLAKE2B_ROUND( 0 );
BLAKE2B_ROUND( 1 );
BLAKE2B_ROUND( 2 );
@ -764,6 +767,7 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state
BLAKE2B_ROUND( 9 );
BLAKE2B_ROUND( 10 );
BLAKE2B_ROUND( 11 );
row1l = _mm_xor_si128( row3l, row1l );
row1h = _mm_xor_si128( row3h, row1h );
STOREU( &state.h[0], _mm_xor_si128( LOADU( &state.h[0] ), row1l ) );