mirror of
https://github.com/shadps4-emu/ext-zlib-ng.git
synced 2024-10-07 00:13:58 +00:00
Add option to disable runtime CPU detection
Signed-off-by: Vladislav Shchapov <vladislav@shchapov.ru>
This commit is contained in:
parent
fe0a6407da
commit
c694bcdaf6
@ -86,6 +86,7 @@ option(WITH_REDUCED_MEM "Reduced memory usage for special cases (reduces perform
|
||||
option(WITH_NEW_STRATEGIES "Use new strategies" ON)
|
||||
option(WITH_NATIVE_INSTRUCTIONS
|
||||
"Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)" OFF)
|
||||
option(WITH_RUNTIME_CPU_DETECTION "Build with runtime detection of CPU architecture" ON)
|
||||
option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
|
||||
option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
|
||||
option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
|
||||
@ -287,12 +288,21 @@ if(WITH_NATIVE_INSTRUCTIONS)
|
||||
separate_arguments(NATIVEOPTIONS UNIX_COMMAND "${NATIVEFLAG}")
|
||||
endif()
|
||||
add_compile_options(${NATIVEOPTIONS})
|
||||
set(WITH_RUNTIME_CPU_DETECTION OFF)
|
||||
else()
|
||||
message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
|
||||
set(WITH_NATIVE_INSTRUCTIONS OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Compile without functable or CPU detection
|
||||
if(NOT WITH_RUNTIME_CPU_DETECTION)
|
||||
if(MSVC AND BASEARCH_X86_FOUND)
|
||||
message(STATUS "WARNING: Microsoft Visual Studio does not support compile time detection of CPU features for \"/arch\" before \"AVX\"")
|
||||
endif()
|
||||
add_definitions(-DDISABLE_RUNTIME_CPU_DETECTION)
|
||||
endif()
|
||||
|
||||
# Force disable LTO if WITH_NATIVE_INSTRUCTIONS is not active
|
||||
if(NOT WITH_NATIVE_INSTRUCTIONS)
|
||||
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF)
|
||||
@ -1302,6 +1312,7 @@ add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation")
|
||||
add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies")
|
||||
add_feature_info(WITH_NATIVE_INSTRUCTIONS WITH_NATIVE_INSTRUCTIONS
|
||||
"Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)")
|
||||
add_feature_info(WITH_RUNTIME_CPU_DETECTION WITH_RUNTIME_CPU_DETECTION "Build with runtime CPU detection")
|
||||
add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings")
|
||||
add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting")
|
||||
add_feature_info(WITH_INFLATE_STRICT WITH_INFLATE_STRICT "Build with strict inflate distance checking")
|
||||
|
29
README.md
29
README.md
@ -94,20 +94,21 @@ make test
|
||||
Build Options
|
||||
-------------
|
||||
|
||||
| CMake | configure | Description | Default |
|
||||
|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------|
|
||||
| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
|
||||
| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
|
||||
| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
|
||||
| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
|
||||
| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
|
||||
| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
|
||||
| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF |
|
||||
| WITH_GTEST | | Build gtest_zlib | ON |
|
||||
| WITH_FUZZERS | | Build test/fuzz | OFF |
|
||||
| WITH_BENCHMARKS | | Build test/benchmarks | OFF |
|
||||
| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
|
||||
| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
|
||||
| CMake | configure | Description | Default |
|
||||
|:---------------------------|:-------------------------|:------------------------------------------------------------------------------------|---------|
|
||||
| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
|
||||
| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
|
||||
| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
|
||||
| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
|
||||
| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
|
||||
| WITH_NATIVE_INSTRUCTIONS | | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
|
||||
| WITH_RUNTIME_CPU_DETECTION | | Compiles with runtime CPU detection | ON |
|
||||
| WITH_SANITIZER | | Build with sanitizer (memory, address, undefined) | OFF |
|
||||
| WITH_GTEST | | Build gtest_zlib | ON |
|
||||
| WITH_FUZZERS | | Build test/fuzz | OFF |
|
||||
| WITH_BENCHMARKS | | Build test/benchmarks | OFF |
|
||||
| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
|
||||
| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
|
||||
|
||||
|
||||
Install
|
||||
|
@ -5,7 +5,6 @@
|
||||
#ifndef ARM_FUNCTIONS_H_
|
||||
#define ARM_FUNCTIONS_H_
|
||||
|
||||
|
||||
#ifdef ARM_NEON
|
||||
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
|
||||
uint32_t chunksize_neon(void);
|
||||
@ -28,4 +27,39 @@ uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
void slide_hash_armv6(deflate_state *s);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// ARM - SIMD
|
||||
# if (defined(ARM_SIMD) && defined(__ARM_FEATURE_SIMD32)) || defined(ARM_NOCHECK_SIMD)
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_armv6
|
||||
# endif
|
||||
// ARM - NEON
|
||||
# if (defined(ARM_NEON) && (defined(__ARM_NEON__) || defined(__ARM_NEON))) || ARM_NOCHECK_NEON
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_neon
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_neon
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_neon
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_neon
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_neon
|
||||
# ifdef HAVE_BUILTIN_CTZLL
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_neon
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_neon
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_neon
|
||||
# endif
|
||||
# endif
|
||||
// ARM - ACLE
|
||||
# if defined(ARM_ACLE) && defined(__ARM_ACLE) && defined(__ARM_FEATURE_CRC32)
|
||||
# undef native_crc32
|
||||
# define native_crc32 crc32_acle
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* ARM_FUNCTIONS_H_ */
|
||||
|
@ -84,4 +84,23 @@ uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
|
||||
# define compare256_generic compare256_c
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// Generic code
|
||||
# define native_adler32 adler32_c
|
||||
# define native_adler32_fold_copy adler32_fold_copy_c
|
||||
# define native_chunkmemset_safe chunkmemset_safe_c
|
||||
# define native_chunksize chunksize_c
|
||||
# define native_crc32 PREFIX(crc32_braid)
|
||||
# define native_crc32_fold crc32_fold_c
|
||||
# define native_crc32_fold_copy crc32_fold_copy_c
|
||||
# define native_crc32_fold_final crc32_fold_final_c
|
||||
# define native_crc32_fold_reset crc32_fold_reset_c
|
||||
# define native_inflate_fast inflate_fast_c
|
||||
# define native_slide_hash slide_hash_c
|
||||
# define native_longest_match longest_match_generic
|
||||
# define native_longest_match_slow longest_match_slow_generic
|
||||
# define native_compare256 compare256_generic
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -27,4 +27,41 @@ uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
|
||||
uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// Power - VMX
|
||||
# if defined(PPC_VMX) && defined(__ALTIVEC__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_vmx
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_vmx
|
||||
# endif
|
||||
// Power8 - VSX
|
||||
# if defined(POWER8_VSX) && defined(_ARCH_PWR8) && defined(__VSX__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_power8
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_power8
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_power8
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_power8
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_power8
|
||||
# endif
|
||||
# if defined(POWER8_VSX_CRC32) && defined(_ARCH_PWR8) && defined(__VSX__)
|
||||
# undef native_crc32
|
||||
# define native_crc32 crc32_power8
|
||||
# endif
|
||||
// Power9
|
||||
# if defined(POWER9) && defined(_ARCH_PWR9)
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_power9
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_power9
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_power9
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* POWER_FUNCTIONS_H_ */
|
||||
|
@ -22,4 +22,28 @@ void slide_hash_rvv(deflate_state *s);
|
||||
void inflate_fast_rvv(PREFIX3(stream) *strm, uint32_t start);
|
||||
#endif
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// RISCV - RVV
|
||||
# if defined(RISCV_RVV) && defined(__riscv_v) && defined(__linux__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_rvv
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_rvv
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_rvv
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_rvv
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_rvv
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_rvv
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_rvv
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_rvv
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_rvv
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* RISCV_FUNCTIONS_H_ */
|
||||
|
@ -9,4 +9,12 @@
|
||||
uint32_t crc32_s390_vx(uint32_t crc, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
# if defined(S390_CRC32_VX) && defined(__zarch__) && __ARCH__ >= 11 && defined(__VX__)
|
||||
# undef native_crc32
|
||||
# define native_crc32 = crc32_s390_vx
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -67,4 +67,106 @@ uint32_t crc32_fold_vpclmulqdq_final(crc32_fold *crc);
|
||||
uint32_t crc32_vpclmulqdq(uint32_t crc32, const uint8_t *buf, size_t len);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
// X86 - SSE2
|
||||
# if (defined(X86_SSE2) && defined(__SSE2__)) || defined(__x86_64__) || defined(_M_X64) || defined(X86_NOCHECK_SSE2)
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_sse2
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_sse2
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_sse2
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_sse2
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_sse2
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_sse2
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_sse2
|
||||
# endif
|
||||
#endif
|
||||
// X86 - SSSE3
|
||||
# if defined(X86_SSSE3) && defined(__SSSE3__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_ssse3
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_ssse3
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_ssse3
|
||||
# endif
|
||||
// X86 - SSE4.2
|
||||
# if defined(X86_SSE42) && defined(__SSE4_2__)
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_sse42
|
||||
# endif
|
||||
|
||||
// X86 - PCLMUL
|
||||
#if defined(X86_PCLMULQDQ_CRC) && defined(__PCLMUL__)
|
||||
# undef native_crc32
|
||||
# define native_crc32 crc32_pclmulqdq
|
||||
# undef native_crc32_fold
|
||||
# define native_crc32_fold crc32_fold_pclmulqdq
|
||||
# undef native_crc32_fold_copy
|
||||
# define native_crc32_fold_copy crc32_fold_pclmulqdq_copy
|
||||
# undef native_crc32_fold_final
|
||||
# define native_crc32_fold_final crc32_fold_pclmulqdq_final
|
||||
# undef native_crc32_fold_reset
|
||||
# define native_crc32_fold_reset crc32_fold_pclmulqdq_reset
|
||||
#endif
|
||||
// X86 - AVX
|
||||
# if defined(X86_AVX2) && defined(__AVX2__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_avx2
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_avx2
|
||||
# undef native_chunkmemset_safe
|
||||
# define native_chunkmemset_safe chunkmemset_safe_avx2
|
||||
# undef native_chunksize
|
||||
# define native_chunksize chunksize_avx2
|
||||
# undef native_inflate_fast
|
||||
# define native_inflate_fast inflate_fast_avx2
|
||||
# undef native_slide_hash
|
||||
# define native_slide_hash slide_hash_avx2
|
||||
# ifdef HAVE_BUILTIN_CTZ
|
||||
# undef native_compare256
|
||||
# define native_compare256 compare256_avx2
|
||||
# undef native_longest_match
|
||||
# define native_longest_match longest_match_avx2
|
||||
# undef native_longest_match_slow
|
||||
# define native_longest_match_slow longest_match_slow_avx2
|
||||
# endif
|
||||
# endif
|
||||
|
||||
// X86 - AVX512 (F,DQ,BW,Vl)
|
||||
# if defined(X86_AVX512) && defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VL__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_avx512
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_avx512
|
||||
// X86 - AVX512 (VNNI)
|
||||
# if defined(X86_AVX512VNNI) && defined(__AVX512VNNI__)
|
||||
# undef native_adler32
|
||||
# define native_adler32 adler32_avx512_vnni
|
||||
# undef native_adler32_fold_copy
|
||||
# define native_adler32_fold_copy adler32_fold_copy_avx512_vnni
|
||||
# endif
|
||||
// X86 - VPCLMULQDQ
|
||||
# if defined(__PCLMUL__) && defined(__AVX512F__) && defined(__VPCLMULQDQ__)
|
||||
# undef native_crc32
|
||||
# define native_crc32 crc32_vpclmulqdq
|
||||
# undef native_crc32_fold
|
||||
# define native_crc32_fold crc32_fold_vpclmulqdq
|
||||
# undef native_crc32_fold_copy
|
||||
# define native_crc32_fold_copy crc32_fold_vpclmulqdq_copy
|
||||
# undef native_crc32_fold_final
|
||||
# define native_crc32_fold_final crc32_fold_vpclmulqdq_final
|
||||
# undef native_crc32_fold_reset
|
||||
# define native_crc32_fold_reset crc32_fold_vpclmulqdq_reset
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* X86_FUNCTIONS_H_ */
|
||||
|
@ -190,8 +190,10 @@ int32_t ZNG_CONDEXPORT PREFIX(deflateInit2)(PREFIX3(stream) *strm, int32_t level
|
||||
deflate_state *s;
|
||||
int wrap = 1;
|
||||
|
||||
#ifndef DISABLE_RUNTIME_CPU_DETECTION
|
||||
/* Force initialization functable, because deflate captures function pointers from functable. */
|
||||
functable.force_init();
|
||||
#endif
|
||||
|
||||
if (strm == NULL)
|
||||
return Z_STREAM_ERROR;
|
||||
|
@ -2,6 +2,7 @@
|
||||
* Copyright (C) 2017 Hans Kristian Rosbach
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#ifndef DISABLE_RUNTIME_CPU_DETECTION
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "functable.h"
|
||||
@ -122,6 +123,7 @@ static void init_functable(void) {
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
// X86 - AVX512 (F,DQ,BW,Vl)
|
||||
#ifdef X86_AVX512
|
||||
if (cf.x86.has_avx512) {
|
||||
ft.adler32 = &adler32_avx512;
|
||||
@ -348,3 +350,5 @@ Z_INTERNAL struct functable_s functable = {
|
||||
longest_match_slow_stub,
|
||||
slide_hash_stub,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
20
functable.h
20
functable.h
@ -9,6 +9,19 @@
|
||||
#include "deflate.h"
|
||||
#include "crc32.h"
|
||||
|
||||
#ifdef DISABLE_RUNTIME_CPU_DETECTION
|
||||
|
||||
# include "arch_functions.h"
|
||||
|
||||
/* When compiling with native instructions it is not necessary to use functable.
|
||||
* Instead we use native_ macro indicating the best available variant of arch-specific
|
||||
* functions for the current platform.
|
||||
*/
|
||||
# define FUNCTABLE_CALL(name) native_ ## name
|
||||
# define FUNCTABLE_FPTR(name) &native_ ## name
|
||||
|
||||
#else
|
||||
|
||||
struct functable_s {
|
||||
void (* force_init) (void);
|
||||
uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, size_t len);
|
||||
@ -32,8 +45,9 @@ Z_INTERNAL extern struct functable_s functable;
|
||||
|
||||
/* Explicitly indicate functions are conditionally dispatched.
|
||||
*/
|
||||
#define FUNCTABLE_CALL(name) functable.name
|
||||
#define FUNCTABLE_FPTR(name) functable.name
|
||||
|
||||
# define FUNCTABLE_CALL(name) functable.name
|
||||
# define FUNCTABLE_FPTR(name) functable.name
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -139,8 +139,10 @@ int32_t ZNG_CONDEXPORT PREFIX(inflateInit2)(PREFIX3(stream) *strm, int32_t windo
|
||||
int32_t ret;
|
||||
struct inflate_state *state;
|
||||
|
||||
#ifndef DISABLE_RUNTIME_CPU_DETECTION
|
||||
/* Initialize functable earlier. */
|
||||
functable.force_init();
|
||||
#endif
|
||||
|
||||
if (strm == NULL)
|
||||
return Z_STREAM_ERROR;
|
||||
|
Loading…
Reference in New Issue
Block a user