Add option to disable runtime CPU detection

Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru>
This commit is contained in:
Vladislav Shchapov 2024-02-28 10:16:12 +05:00 committed by Hans Kristian Rosbach
parent fe0a6407da
commit c694bcdaf6
12 changed files with 276 additions and 18 deletions

View File

@ -86,6 +86,7 @@ option(WITH_REDUCED_MEM "Reduced memory usage for special cases (reduces perform
option(WITH_NEW_STRATEGIES "Use new strategies" ON)
option(WITH_NATIVE_INSTRUCTIONS
"Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)" OFF)
option(WITH_RUNTIME_CPU_DETECTION "Build with runtime detection of CPU architecture" ON)
option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
@ -287,12 +288,21 @@ if(WITH_NATIVE_INSTRUCTIONS)
separate_arguments(NATIVEOPTIONS UNIX_COMMAND "${NATIVEFLAG}")
endif()
add_compile_options(${NATIVEOPTIONS})
set(WITH_RUNTIME_CPU_DETECTION OFF)
else()
message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
set(WITH_NATIVE_INSTRUCTIONS OFF)
endif()
endif()
# Compile without functable or CPU detection
if(NOT WITH_RUNTIME_CPU_DETECTION)
if(MSVC AND BASEARCH_X86_FOUND)
message(STATUS "WARNING: Microsoft Visual Studio does not support compile time detection of CPU features for \"/arch\" before \"AVX\"")
endif()
add_definitions(-DDISABLE_RUNTIME_CPU_DETECTION)
endif()
# Force disable LTO if WITH_NATIVE_INSTRUCTIONS is not active
if(NOT WITH_NATIVE_INSTRUCTIONS)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF)
@ -1302,6 +1312,7 @@ add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation")
add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies")
add_feature_info(WITH_NATIVE_INSTRUCTIONS WITH_NATIVE_INSTRUCTIONS
"Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)")
add_feature_info(WITH_RUNTIME_CPU_DETECTION WITH_RUNTIME_CPU_DETECTION "Build with runtime CPU detection")
add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings")
add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting")
add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking")

View File

@ -94,20 +94,21 @@ make test
Build Options
-------------
| CMake | configure | Description | Default |
|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------|
| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF |
| WITH_GTEST | | Build gtest_zlib | ON |
| WITH_FUZZERS | | Build test/fuzz | OFF |
| WITH_BENCHMARKS | | Build test/benchmarks | OFF |
| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
| CMake | configure | Description | Default |
|:---------------------------|:-------------------------|:------------------------------------------------------------------------------------|---------|
| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
| WITH_RUNTIME_CPU_DETECTION | | Compiles with runtime CPU detection | ON |
| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF |
| WITH_GTEST | | Build gtest_zlib | ON |
| WITH_FUZZERS | | Build test/fuzz | OFF |
| WITH_BENCHMARKS | | Build test/benchmarks | OFF |
| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
Install

View File

@ -5,7 +5,6 @@
#ifndef ARM_FUNCTIONS_H_
#define ARM_FUNCTIONS_H_
#ifdef ARM_NEON
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t chunksize_neon(void);
@ -28,4 +27,39 @@ uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
void slide_hash_armv6(deflate_state *s);
#endif
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// ARM - SIMD
# if (defined(ARM_SIMD) && defined(__ARM_FEATURE_SIMD32)) || defined(ARM_NOCHECK_SIMD)
# undef native_slide_hash
# define native_slide_hash slide_hash_armv6
# endif
// ARM - NEON
# if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON
# undef native_adler32
# define native_adler32 adler32_neon
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_neon
# undef native_chunksize
# define native_chunksize chunksize_neon
# undef native_inflate_fast
# define native_inflate_fast inflate_fast_neon
# undef native_slide_hash
# define native_slide_hash slide_hash_neon
# ifdef HAVE_BUILTIN_CTZLL
# undef native_compare256
# define native_compare256 compare256_neon
# undef native_longest_match
# define native_longest_match longest_match_neon
# undef native_longest_match_slow
# define native_longest_match_slow longest_match_slow_neon
# endif
# endif
// ARM - ACLE
# if defined(ARM_ACLE) && defined(__ARM_ACLE) && defined(__ARM_FEATURE_CRC32)
# undef native_crc32
# define native_crc32 crc32_acle
# endif
#endif
#endif /* ARM_FUNCTIONS_H_ */

View File

@ -84,4 +84,23 @@ uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
# define compare256_generic compare256_c
#endif
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// Generic code
# define native_adler32 adler32_c
# define native_adler32_fold_copy adler32_fold_copy_c
# define native_chunkmemset_safe chunkmemset_safe_c
# define native_chunksize chunksize_c
# define native_crc32 PREFIX(crc32_braid)
# define native_crc32_fold crc32_fold_c
# define native_crc32_fold_copy crc32_fold_copy_c
# define native_crc32_fold_final crc32_fold_final_c
# define native_crc32_fold_reset crc32_fold_reset_c
# define native_inflate_fast inflate_fast_c
# define native_slide_hash slide_hash_c
# define native_longest_match longest_match_generic
# define native_longest_match_slow longest_match_slow_generic
# define native_compare256 compare256_generic
#endif
#endif

View File

@ -27,4 +27,41 @@ uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
#endif
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// Power - VMX
# if defined(PPC_VMX) && defined(__ALTIVEC__)
# undef native_adler32
# define native_adler32 adler32_vmx
# undef native_slide_hash
# define native_slide_hash slide_hash_vmx
# endif
// Power8 - VSX
# if defined(POWER8_VSX) && defined(_ARCH_PWR8) && defined(__VSX__)
# undef native_adler32
# define native_adler32 adler32_power8
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_power8
# undef native_chunksize
# define native_chunksize chunksize_power8
# undef native_inflate_fast
# define native_inflate_fast inflate_fast_power8
# undef native_slide_hash
# define native_slide_hash slide_hash_power8
# endif
# if defined(POWER8_VSX_CRC32) && defined(_ARCH_PWR8) && defined(__VSX__)
# undef native_crc32
# define native_crc32 crc32_power8
# endif
// Power9
# if defined(POWER9) && defined(_ARCH_PWR9)
# undef native_compare256
# define native_compare256 compare256_power9
# undef native_longest_match
# define native_longest_match longest_match_power9
# undef native_longest_match_slow
# define native_longest_match_slow longest_match_slow_power9
# endif
#endif
#endif /* POWER_FUNCTIONS_H_ */

View File

@ -22,4 +22,28 @@ void slide_hash_rvv(deflate_state *s);
void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
#endif
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// RISCV - RVV
# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__)
# undef native_adler32
# define native_adler32 adler32_rvv
# undef native_adler32_fold_copy
# define native_adler32_fold_copy adler32_fold_copy_rvv
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_rvv
# undef native_chunksize
# define native_chunksize chunksize_rvv
# undef native_compare256
# define native_compare256 compare256_rvv
# undef native_inflate_fast
# define native_inflate_fast inflate_fast_rvv
# undef native_longest_match
# define native_longest_match longest_match_rvv
# undef native_longest_match_slow
# define native_longest_match_slow longest_match_slow_rvv
# undef native_slide_hash
# define native_slide_hash slide_hash_rvv
# endif
#endif
#endif /* RISCV_FUNCTIONS_H_ */

View File

@ -9,4 +9,12 @@
uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
#endif
#ifdef DISABLE_RUNTIME_CPU_DETECTION
# if defined(S390_CRC32_VX) && defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__)
# undef native_crc32
# define native_crc32 = crc32_s390_vx
# endif
#endif
#endif

View File

@ -67,4 +67,106 @@ uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
#endif
#ifdef DISABLE_RUNTIME_CPU_DETECTION
// X86 - SSE2
# if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2)
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_sse2
# undef native_chunksize
# define native_chunksize chunksize_sse2
# undef native_inflate_fast
# define native_inflate_fast inflate_fast_sse2
# undef native_slide_hash
# define native_slide_hash slide_hash_sse2
# ifdef HAVE_BUILTIN_CTZ
# undef native_compare256
# define native_compare256 compare256_sse2
# undef native_longest_match
# define native_longest_match longest_match_sse2
# undef native_longest_match_slow
# define native_longest_match_slow longest_match_slow_sse2
# endif
#endif
// X86 - SSSE3
# if defined(X86_SSSE3) && defined(__SSSE3__)
# undef native_adler32
# define native_adler32 adler32_ssse3
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_ssse3
# undef native_inflate_fast
# define native_inflate_fast inflate_fast_ssse3
# endif
// X86 - SSE4.2
# if defined(X86_SSE42) && defined(__SSE4_2__)
# undef native_adler32_fold_copy
# define native_adler32_fold_copy adler32_fold_copy_sse42
# endif
// X86 - PCLMUL
#if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__)
# undef native_crc32
# define native_crc32 crc32_pclmulqdq
# undef native_crc32_fold
# define native_crc32_fold crc32_fold_pclmulqdq
# undef native_crc32_fold_copy
# define native_crc32_fold_copy crc32_fold_pclmulqdq_copy
# undef native_crc32_fold_final
# define native_crc32_fold_final crc32_fold_pclmulqdq_final
# undef native_crc32_fold_reset
# define native_crc32_fold_reset crc32_fold_pclmulqdq_reset
#endif
// X86 - AVX
# if defined(X86_AVX2) && defined(__AVX2__)
# undef native_adler32
# define native_adler32 adler32_avx2
# undef native_adler32_fold_copy
# define native_adler32_fold_copy adler32_fold_copy_avx2
# undef native_chunkmemset_safe
# define native_chunkmemset_safe chunkmemset_safe_avx2
# undef native_chunksize
# define native_chunksize chunksize_avx2
# undef native_inflate_fast
# define native_inflate_fast inflate_fast_avx2
# undef native_slide_hash
# define native_slide_hash slide_hash_avx2
# ifdef HAVE_BUILTIN_CTZ
# undef native_compare256
# define native_compare256 compare256_avx2
# undef native_longest_match
# define native_longest_match longest_match_avx2
# undef native_longest_match_slow
# define native_longest_match_slow longest_match_slow_avx2
# endif
# endif
// X86 - AVX512 (F,DQ,BW,Vl)
# if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
# undef native_adler32
# define native_adler32 adler32_avx512
# undef native_adler32_fold_copy
# define native_adler32_fold_copy adler32_fold_copy_avx512
// X86 - AVX512 (VNNI)
# if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__)
# undef native_adler32
# define native_adler32 adler32_avx512_vnni
# undef native_adler32_fold_copy
# define native_adler32_fold_copy adler32_fold_copy_avx512_vnni
# endif
// X86 - VPCLMULQDQ
# if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__)
# undef native_crc32
# define native_crc32 crc32_vpclmulqdq
# undef native_crc32_fold
# define native_crc32_fold crc32_fold_vpclmulqdq
# undef native_crc32_fold_copy
# define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy
# undef native_crc32_fold_final
# define native_crc32_fold_final crc32_fold_vpclmulqdq_final
# undef native_crc32_fold_reset
# define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset
# endif
# endif
#endif
#endif /* X86_FUNCTIONS_H_ */

View File

@ -190,8 +190,10 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
deflate_state *s;
int wrap = 1;
#ifndef DISABLE_RUNTIME_CPU_DETECTION
/* Force initialization functable, because deflate captures function pointers from functable. */
functable.force_init();
#endif
if (strm == NULL)
return Z_STREAM_ERROR;

View File

@ -2,6 +2,7 @@
* Copyright (C) 2017 Hans Kristian Rosbach
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifndef DISABLE_RUNTIME_CPU_DETECTION
#include "zbuild.h"
#include "functable.h"
@ -122,6 +123,7 @@ static void init_functable(void) {
# endif
}
#endif
// X86 - AVX512 (F,DQ,BW,Vl)
#ifdef X86_AVX512
if (cf.x86.has_avx512) {
ft.adler32 = &adler32_avx512;
@ -348,3 +350,5 @@ Z_INTERNAL struct functable_s functable = {
longest_match_slow_stub,
slide_hash_stub,
};
#endif

View File

@ -9,6 +9,19 @@
#include "deflate.h"
#include "crc32.h"
#ifdef DISABLE_RUNTIME_CPU_DETECTION
# include "arch_functions.h"
/* When compiling with native instructions it is not necessary to use functable.
* Instead we use native_ macro indicating the best available variant of arch-specific
* functions for the current platform.
*/
# define FUNCTABLE_CALL(name) native_ ## name
# define FUNCTABLE_FPTR(name) &native_ ## name
#else
struct functable_s {
void (* force_init) (void);
uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, size_t len);
@ -32,8 +45,9 @@ Z_INTERNAL extern struct functable_s functable;
/* Explicitly indicate functions are conditionally dispatched.
*/
#define FUNCTABLE_CALL(name) functable.name
#define FUNCTABLE_FPTR(name) functable.name
# define FUNCTABLE_CALL(name) functable.name
# define FUNCTABLE_FPTR(name) functable.name
#endif
#endif

View File

@ -139,8 +139,10 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windo
int32_t ret;
struct inflate_state *state;
#ifndef DISABLE_RUNTIME_CPU_DETECTION
/* Initialize functable earlier. */
functable.force_init();
#endif
if (strm == NULL)
return Z_STREAM_ERROR;