Compare commits

...

14 Commits

Author SHA1 Message Date
JordanTheToaster
4ad5dde780 3rdparty: Sync cpuinfo to commit 8df44962d437a0477f07ba6b8843d0b6a48646a4 2024-11-04 12:41:54 +01:00
JordanTheToaster
0bc043a0bb 3rdparty: Update LZMA/7zipSDK to 24.08 2024-11-04 12:41:54 +01:00
JordanTheToaster
c936b7db29 Deps: Update SDL to 2.30.9 2024-11-04 12:41:54 +01:00
JordanTheToaster
2793c49694 Deps: Update Freetype to 2.13.3
a
2024-11-04 12:41:54 +01:00
JordanTheToaster
c578859122 3rdparty: Update xbyak to 7.21 2024-11-04 12:41:54 +01:00
JordanTheToaster
43e6f06a9e Deps: Update libpng to 1.6.44 2024-11-04 12:41:54 +01:00
JordanTheToaster
cf8e9f773e Deps: Update Harfbuzz to 10.0.1 2024-11-04 12:41:54 +01:00
PCSX2 Bot
ed3bdc61f4 Qt: Update Base Translation 2024-11-04 12:40:42 +01:00
TheLastRar
e57f7d92c3 FSUI: Don't move IMGUI cursor unnecessarily in DrawGameGrid() 2024-11-04 12:40:31 +01:00
Gd7
b95eb5cd74 GameDB: Final Fantasy X Optimal FPU config. (#11958)
Fixes the vast majority of FFX's FPU glitches as well as fixing a major cutscene bug without breaking anything more than what the current config already breaks.

EE clamping fixes reverse controls and characters and enemies facing the wrong way during battles, everything else is affected by EE rounding and EE division rounding.

VU0 clamping affects nothing.
2024-11-04 12:40:16 +01:00
KamFretoZ
b865bbb0d3 CI: Add ci skip to base translation update workflow 2024-11-04 12:39:04 +01:00
KamFretoZ
ca6e1c9a46 Qt: Cleanup Unused InputRec Menu 2024-11-04 12:39:04 +01:00
KamFretoZ
ea8a2deae6 Qt: Icon Tweaks 2024-11-04 12:39:04 +01:00
KamFretoZ
c67237672c Qt/Savestate: Add option to disable savestate selector UI 2024-11-04 12:39:04 +01:00
86 changed files with 4280 additions and 2217 deletions

View File

@@ -20,7 +20,7 @@ jobs:
uses: peter-evans/create-pull-request@v7
with:
title: "Qt: Update Base Translation"
commit-message: "Qt: Update Base Translation"
commit-message: "[ci skip]"
committer: "PCSX2 Bot <PCSX2Bot@users.noreply.github.com>"
author: "PCSX2 Bot <PCSX2Bot@users.noreply.github.com>"
body: "Daily update of base translation sources."

View File

@@ -16,10 +16,10 @@ fi
LIBBACKTRACE=ad106d5fdd5d960bd33fae1c48a351af567fd075
LIBJPEG=9f
LIBPNG=1.6.43
LIBPNG=1.6.44
LIBWEBP=1.4.0
LZ4=b8fd2d15309dd4e605070bd4486e26b6ef814e29
SDL=SDL2-2.30.8
SDL=SDL2-2.30.9
QT=6.8.0
ZSTD=1.5.6
@@ -34,10 +34,10 @@ cd deps-build
cat > SHASUMS <<EOF
fd6f417fe9e3a071cf1424a5152d926a34c4a3c5070745470be6cf12a404ed79 $LIBBACKTRACE.zip
04705c110cb2469caa79fb71fba3d7bf834914706e9641a4589485c1f832565b jpegsrc.v$LIBJPEG.tar.gz
6a5ca0652392a2d7c9db2ae5b40210843c0bbc081cbd410825ab00cc59f14a6c libpng-$LIBPNG.tar.xz
60c4da1d5b7f0aa8d158da48e8f8afa9773c1c8baa5d21974df61f1886b8ce8e libpng-$LIBPNG.tar.xz
61f873ec69e3be1b99535634340d5bde750b2e4447caa1db9f61be3fd49ab1e5 libwebp-$LIBWEBP.tar.gz
0728800155f3ed0a0c87e03addbd30ecbe374f7b080678bbca1506051d50dec3 $LZ4.tar.gz
380c295ea76b9bd72d90075793971c8bcb232ba0a69a9b14da4ae8f603350058 $SDL.tar.gz
24b574f71c87a763f50704bbb630cbe38298d544a1f890f099a4696b1d6beba4 $SDL.tar.gz
8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1 zstd-$ZSTD.tar.gz
1bad481710aa27f872de6c9f72651f89a6107f0077003d0ebfcc9fd15cba3c75 qtbase-everywhere-src-$QT.tar.xz
595bf8557b91e1f8ebc726f1e09868a3c7e610ff5045068f2d4ea2428c49a5d4 qtimageformats-everywhere-src-$QT.tar.xz

View File

@@ -14,8 +14,8 @@
"sources": [
{
"type": "archive",
"url": "https://libsdl.org/release/SDL2-2.30.8.tar.gz",
"sha256": "380c295ea76b9bd72d90075793971c8bcb232ba0a69a9b14da4ae8f603350058"
"url": "https://libsdl.org/release/SDL2-2.30.9.tar.gz",
"sha256": "24b574f71c87a763f50704bbb630cbe38298d544a1f890f099a4696b1d6beba4"
}
],
"cleanup": [

View File

@@ -38,12 +38,12 @@ if [ "${INSTALLDIR:0:1}" != "/" ]; then
INSTALLDIR="$PWD/$INSTALLDIR"
fi
FREETYPE=2.13.2
HARFBUZZ=8.3.1
SDL=SDL2-2.30.8
FREETYPE=2.13.3
HARFBUZZ=10.0.1
SDL=SDL2-2.30.9
ZSTD=1.5.6
LZ4=b8fd2d15309dd4e605070bd4486e26b6ef814e29
LIBPNG=1.6.43
LIBPNG=1.6.44
LIBJPEG=9f
LIBWEBP=1.4.0
FFMPEG=6.0
@@ -74,12 +74,12 @@ CMAKE_ARCH_ARM64=-DCMAKE_OSX_ARCHITECTURES="arm64"
CMAKE_ARCH_UNIVERSAL=-DCMAKE_OSX_ARCHITECTURES="x86_64;arm64"
cat > SHASUMS <<EOF
12991c4e55c506dd7f9b765933e62fd2be2e06d421505d7950a132e4f1bb484d freetype-$FREETYPE.tar.xz
19a54fe9596f7a47c502549fce8e8a10978c697203774008cc173f8360b19a9a harfbuzz-$HARFBUZZ.tar.gz
380c295ea76b9bd72d90075793971c8bcb232ba0a69a9b14da4ae8f603350058 $SDL.tar.gz
0550350666d427c74daeb85d5ac7bb353acba5f76956395995311a9c6f063289 freetype-$FREETYPE.tar.xz
e7358ea86fe10fb9261931af6f010d4358dac64f7074420ca9bc94aae2bdd542 harfbuzz-$HARFBUZZ.tar.gz
24b574f71c87a763f50704bbb630cbe38298d544a1f890f099a4696b1d6beba4 $SDL.tar.gz
8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1 zstd-$ZSTD.tar.gz
0728800155f3ed0a0c87e03addbd30ecbe374f7b080678bbca1506051d50dec3 $LZ4.tar.gz
6a5ca0652392a2d7c9db2ae5b40210843c0bbc081cbd410825ab00cc59f14a6c libpng-$LIBPNG.tar.xz
60c4da1d5b7f0aa8d158da48e8f8afa9773c1c8baa5d21974df61f1886b8ce8e libpng-$LIBPNG.tar.xz
61f873ec69e3be1b99535634340d5bde750b2e4447caa1db9f61be3fd49ab1e5 libwebp-$LIBWEBP.tar.gz
04705c110cb2469caa79fb71fba3d7bf834914706e9641a4589485c1f832565b jpegsrc.v$LIBJPEG.tar.gz
57be87c22d9b49c112b6d24bc67d42508660e6b718b3db89c44e47e289137082 ffmpeg-$FFMPEG.tar.xz

View File

@@ -20,12 +20,12 @@ if [ "${INSTALLDIR:0:1}" != "/" ]; then
INSTALLDIR="$PWD/$INSTALLDIR"
fi
FREETYPE=2.13.2
HARFBUZZ=8.3.1
SDL=SDL2-2.30.8
FREETYPE=2.13.3
HARFBUZZ=10.0.1
SDL=SDL2-2.30.9
ZSTD=1.5.6
LZ4=b8fd2d15309dd4e605070bd4486e26b6ef814e29
LIBPNG=1.6.43
LIBPNG=1.6.44
LIBJPEG=9f
LIBWEBP=1.4.0
FFMPEG=6.0
@@ -54,12 +54,12 @@ CMAKE_COMMON=(
)
cat > SHASUMS <<EOF
12991c4e55c506dd7f9b765933e62fd2be2e06d421505d7950a132e4f1bb484d freetype-$FREETYPE.tar.xz
19a54fe9596f7a47c502549fce8e8a10978c697203774008cc173f8360b19a9a harfbuzz-$HARFBUZZ.tar.gz
380c295ea76b9bd72d90075793971c8bcb232ba0a69a9b14da4ae8f603350058 $SDL.tar.gz
0550350666d427c74daeb85d5ac7bb353acba5f76956395995311a9c6f063289 freetype-$FREETYPE.tar.xz
e7358ea86fe10fb9261931af6f010d4358dac64f7074420ca9bc94aae2bdd542 harfbuzz-$HARFBUZZ.tar.gz
24b574f71c87a763f50704bbb630cbe38298d544a1f890f099a4696b1d6beba4 $SDL.tar.gz
8c29e06cf42aacc1eafc4077ae2ec6c6fcb96a626157e0593d5e82a34fd403c1 zstd-$ZSTD.tar.gz
0728800155f3ed0a0c87e03addbd30ecbe374f7b080678bbca1506051d50dec3 $LZ4.tar.gz
6a5ca0652392a2d7c9db2ae5b40210843c0bbc081cbd410825ab00cc59f14a6c libpng-$LIBPNG.tar.xz
60c4da1d5b7f0aa8d158da48e8f8afa9773c1c8baa5d21974df61f1886b8ce8e libpng-$LIBPNG.tar.xz
61f873ec69e3be1b99535634340d5bde750b2e4447caa1db9f61be3fd49ab1e5 libwebp-$LIBWEBP.tar.gz
04705c110cb2469caa79fb71fba3d7bf834914706e9641a4589485c1f832565b jpegsrc.v$LIBJPEG.tar.gz
57be87c22d9b49c112b6d24bc67d42508660e6b718b3db89c44e47e289137082 ffmpeg-$FFMPEG.tar.xz

View File

@@ -42,14 +42,14 @@ echo INSTALLDIR=%INSTALLDIR%
cd "%BUILDDIR%"
set FREETYPE=2.13.2
set HARFBUZZ=8.3.1
set FREETYPE=2.13.3
set HARFBUZZ=10.0.1
set LIBJPEG=9f
set LIBPNG=1643
set LZ4=b8fd2d15309dd4e605070bd4486e26b6ef814e29
set QT=6.8.0
set QTMINOR=6.8
set SDL=SDL2-2.30.8
set SDL=SDL2-2.30.9
set WEBP=1.4.0
set ZLIB=1.3.1
set ZLIBSHORT=131
@@ -60,13 +60,13 @@ set SHADERC_GLSLANG=142052fa30f9eca191aa9dcf65359fcaed09eeec
set SHADERC_SPIRVHEADERS=5e3ad389ee56fca27c9705d093ae5387ce404df4
set SHADERC_SPIRVTOOLS=dd4b663e13c07fea4fbb3f70c1c91c86731099f7
call :downloadfile "freetype-%FREETYPE%.tar.gz" https://sourceforge.net/projects/freetype/files/freetype2/%FREETYPE%/freetype-%FREETYPE%.tar.gz/download 1ac27e16c134a7f2ccea177faba19801131116fd682efc1f5737037c5db224b5 || goto error
call :downloadfile "harfbuzz-%HARFBUZZ%.zip" https://github.com/harfbuzz/harfbuzz/archive/refs/tags/%HARFBUZZ%.zip b2bc56184ae37324bc4829fde7d3f9e6916866ad711ee85792e457547c9fd127 || goto error
call :downloadfile "freetype-%FREETYPE%.tar.gz" https://sourceforge.net/projects/freetype/files/freetype2/%FREETYPE%/freetype-%FREETYPE%.tar.gz/download 5c3a8e78f7b24c20b25b54ee575d6daa40007a5f4eea2845861c3409b3021747 || goto error
call :downloadfile "harfbuzz-%HARFBUZZ%.zip" https://github.com/harfbuzz/harfbuzz/archive/refs/tags/%HARFBUZZ%.zip 8adf9f5a4b6022aa2744f45c89ce347df46fea8403e99f01d650b11c417d0aa8 || goto error
call :downloadfile "lpng%LIBPNG%.zip" https://download.sourceforge.net/libpng/lpng1643.zip fc466a1e638e635d6c66363bdf3f38555b81b0141d0b06ba45b49ccca327436d || goto error
call :downloadfile "jpegsr%LIBJPEG%.zip" https://ijg.org/files/jpegsr%LIBJPEG%.zip 6255da8c89e09d694e6800688c76145eb6870a76ac0d36c74fccd61b3940aafa || goto error
call :downloadfile "libwebp-%WEBP%.tar.gz" "https://storage.googleapis.com/downloads.webmproject.org/releases/webp/libwebp-%WEBP%.tar.gz" 61f873ec69e3be1b99535634340d5bde750b2e4447caa1db9f61be3fd49ab1e5 || goto error
call :downloadfile "lz4-%LZ4%.zip" "https://github.com/lz4/lz4/archive/%LZ4%.zip" 0c33119688d6b180c7e760b0acd70059222389cfd581632623784bee27e51a31 || goto error
call :downloadfile "%SDL%.zip" "https://libsdl.org/release/%SDL%.zip" abe2921dffcb25d39d270454810b211a9f47be3e5e802bc45e7d058f286a325e || goto error
call :downloadfile "%SDL%.zip" "https://libsdl.org/release/%SDL%.zip" ec855bcd815b4b63d0c958c42c2923311c656227d6e0c1ae1e721406d346444b || goto error
call :downloadfile "qtbase-everywhere-src-%QT%.zip" "https://download.qt.io/official_releases/qt/%QTMINOR%/%QT%/submodules/qtbase-everywhere-src-%QT%.zip" c3b41915341d853b6374cf93f1fcced2c8e4be9360f29c656960e1d0d15046a3 || goto error
call :downloadfile "qtimageformats-everywhere-src-%QT%.zip" "https://download.qt.io/official_releases/qt/%QTMINOR%/%QT%/submodules/qtimageformats-everywhere-src-%QT%.zip" 809081a7bdf7e48262fbe9437e4e756df6ad2649433e803c4040026e650d7c91 || goto error
call :downloadfile "qtsvg-everywhere-src-%QT%.zip" "https://download.qt.io/official_releases/qt/%QTMINOR%/%QT%/submodules/qtsvg-everywhere-src-%QT%.zip" 89f1ef4595f68c3d34c63a7c1c4ce475e701e103f0473f3fd0718a2e5234de6e || goto error

View File

@@ -40,14 +40,14 @@ set "PATH=%PATH%;%INSTALLDIR%\bin"
cd "%BUILDDIR%"
set FREETYPE=2.13.2
set HARFBUZZ=8.3.1
set FREETYPE=2.13.3
set HARFBUZZ=10.0.1
set LIBJPEG=9f
set LIBPNG=1643
set LZ4=b8fd2d15309dd4e605070bd4486e26b6ef814e29
set QT=6.8.0
set QTMINOR=6.8
set SDL=SDL2-2.30.8
set SDL=SDL2-2.30.9
set WEBP=1.4.0
set ZLIB=1.3.1
set ZLIBSHORT=131
@@ -58,13 +58,13 @@ set SHADERC_GLSLANG=142052fa30f9eca191aa9dcf65359fcaed09eeec
set SHADERC_SPIRVHEADERS=5e3ad389ee56fca27c9705d093ae5387ce404df4
set SHADERC_SPIRVTOOLS=dd4b663e13c07fea4fbb3f70c1c91c86731099f7
call :downloadfile "freetype-%FREETYPE%.tar.gz" https://sourceforge.net/projects/freetype/files/freetype2/%FREETYPE%/freetype-%FREETYPE%.tar.gz/download 1ac27e16c134a7f2ccea177faba19801131116fd682efc1f5737037c5db224b5 || goto error
call :downloadfile "harfbuzz-%HARFBUZZ%.zip" https://github.com/harfbuzz/harfbuzz/archive/refs/tags/%HARFBUZZ%.zip b2bc56184ae37324bc4829fde7d3f9e6916866ad711ee85792e457547c9fd127 || goto error
call :downloadfile "freetype-%FREETYPE%.tar.gz" https://sourceforge.net/projects/freetype/files/freetype2/%FREETYPE%/freetype-%FREETYPE%.tar.gz/download 5c3a8e78f7b24c20b25b54ee575d6daa40007a5f4eea2845861c3409b3021747 || goto error
call :downloadfile "harfbuzz-%HARFBUZZ%.zip" https://github.com/harfbuzz/harfbuzz/archive/refs/tags/%HARFBUZZ%.zip 8adf9f5a4b6022aa2744f45c89ce347df46fea8403e99f01d650b11c417d0aa8 || goto error
call :downloadfile "lpng%LIBPNG%.zip" https://download.sourceforge.net/libpng/lpng1643.zip fc466a1e638e635d6c66363bdf3f38555b81b0141d0b06ba45b49ccca327436d || goto error
call :downloadfile "jpegsr%LIBJPEG%.zip" https://ijg.org/files/jpegsr%LIBJPEG%.zip 6255da8c89e09d694e6800688c76145eb6870a76ac0d36c74fccd61b3940aafa || goto error
call :downloadfile "libwebp-%WEBP%.tar.gz" "https://storage.googleapis.com/downloads.webmproject.org/releases/webp/libwebp-%WEBP%.tar.gz" 61f873ec69e3be1b99535634340d5bde750b2e4447caa1db9f61be3fd49ab1e5 || goto error
call :downloadfile "lz4-%LZ4%.zip" "https://github.com/lz4/lz4/archive/%LZ4%.zip" 0c33119688d6b180c7e760b0acd70059222389cfd581632623784bee27e51a31 || goto error
call :downloadfile "%SDL%.zip" "https://libsdl.org/release/%SDL%.zip" abe2921dffcb25d39d270454810b211a9f47be3e5e802bc45e7d058f286a325e || goto error
call :downloadfile "%SDL%.zip" "https://libsdl.org/release/%SDL%.zip" ec855bcd815b4b63d0c958c42c2923311c656227d6e0c1ae1e721406d346444b || goto error
call :downloadfile "qtbase-everywhere-src-%QT%.zip" "https://download.qt.io/official_releases/qt/%QTMINOR%/%QT%/submodules/qtbase-everywhere-src-%QT%.zip" c3b41915341d853b6374cf93f1fcced2c8e4be9360f29c656960e1d0d15046a3 || goto error
call :downloadfile "qtimageformats-everywhere-src-%QT%.zip" "https://download.qt.io/official_releases/qt/%QTMINOR%/%QT%/submodules/qtimageformats-everywhere-src-%QT%.zip" 809081a7bdf7e48262fbe9437e4e756df6ad2649433e803c4040026e650d7c91 || goto error
call :downloadfile "qtsvg-everywhere-src-%QT%.zip" "https://download.qt.io/official_releases/qt/%QTMINOR%/%QT%/submodules/qtsvg-everywhere-src-%QT%.zip" 89f1ef4595f68c3d34c63a7c1c4ce475e701e103f0473f3fd0718a2e5234de6e || goto error

View File

@@ -60,7 +60,7 @@ ssize_t CPUINFO_ABI cpuinfo_mock_read(int fd, void* buffer, size_t capacity);
void CPUINFO_ABI cpuinfo_set_hwcap(uint32_t hwcap);
#endif
#if CPUINFO_ARCH_ARM
void CPUINFO_ABI cpuinfo_set_hwcap2(uint32_t hwcap2);
void CPUINFO_ABI cpuinfo_set_hwcap2(uint64_t hwcap2);
#endif
#endif

View File

@@ -496,13 +496,19 @@ enum cpuinfo_uarch {
cpuinfo_uarch_cortex_x2 = 0x00300502,
/** ARM Cortex-X3. */
cpuinfo_uarch_cortex_x3 = 0x00300503,
/** ARM Cortex-X4. */
cpuinfo_uarch_cortex_x4 = 0x00300504,
/** ARM Cortex-A510. */
cpuinfo_uarch_cortex_a510 = 0x00300551,
/** ARM Cortex-A520. */
cpuinfo_uarch_cortex_a520 = 0x00300552,
/** ARM Cortex-A710. */
cpuinfo_uarch_cortex_a710 = 0x00300571,
/** ARM Cortex-A715. */
cpuinfo_uarch_cortex_a715 = 0x00300572,
/** ARM Cortex-A720. */
cpuinfo_uarch_cortex_a720 = 0x00300573,
/** Qualcomm Scorpion. */
cpuinfo_uarch_scorpion = 0x00400100,
@@ -1664,6 +1670,14 @@ struct cpuinfo_arm_isa {
bool sve;
bool sve2;
bool i8mm;
bool sme;
bool sme2;
bool sme2p1;
bool sme_i16i32;
bool sme_bi32i32;
bool sme_b16b16;
bool sme_f16f16;
uint32_t svelen;
#endif
bool rdm;
bool fp16arith;
@@ -2036,6 +2050,71 @@ static inline bool cpuinfo_has_arm_sve2(void) {
#endif
}
// Function to get the max SVE vector length on ARM CPU's which support SVE.
static inline uint32_t cpuinfo_get_max_arm_sve_length(void) {
#if CPUINFO_ARCH_ARM64
return cpuinfo_isa.svelen * 8; // bytes * 8 = bit length(vector length)
#else
return 0;
#endif
}
static inline bool cpuinfo_has_arm_sme(void) {
#if CPUINFO_ARCH_ARM64
return cpuinfo_isa.sme;
#else
return false;
#endif
}
static inline bool cpuinfo_has_arm_sme2(void) {
#if CPUINFO_ARCH_ARM64
return cpuinfo_isa.sme2;
#else
return false;
#endif
}
static inline bool cpuinfo_has_arm_sme2p1(void) {
#if CPUINFO_ARCH_ARM64
return cpuinfo_isa.sme2p1;
#else
return false;
#endif
}
static inline bool cpuinfo_has_arm_sme_i16i32(void) {
#if CPUINFO_ARCH_ARM64
return cpuinfo_isa.sme_i16i32;
#else
return false;
#endif
}
static inline bool cpuinfo_has_arm_sme_bi32i32(void) {
#if CPUINFO_ARCH_ARM64
return cpuinfo_isa.sme_bi32i32;
#else
return false;
#endif
}
static inline bool cpuinfo_has_arm_sme_b16b16(void) {
#if CPUINFO_ARCH_ARM64
return cpuinfo_isa.sme_b16b16;
#else
return false;
#endif
}
static inline bool cpuinfo_has_arm_sme_f16f16(void) {
#if CPUINFO_ARCH_ARM64
return cpuinfo_isa.sme_f16f16;
#else
return false;
#endif
}
#if CPUINFO_ARCH_RISCV32 || CPUINFO_ARCH_RISCV64
/* This structure is not a part of stable API. Use cpuinfo_has_riscv_* functions
* instead. */

View File

@@ -64,6 +64,7 @@ enum cpuinfo_arm_chipset_series {
cpuinfo_arm_chipset_series_telechips_tcc,
cpuinfo_arm_chipset_series_texas_instruments_omap,
cpuinfo_arm_chipset_series_unisoc_t,
cpuinfo_arm_chipset_series_unisoc_ums,
cpuinfo_arm_chipset_series_wondermedia_wm,
cpuinfo_arm_chipset_series_max,
};

View File

@@ -24,7 +24,7 @@ void cpuinfo_set_wcid(uint32_t wcid) {
void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
uint32_t features,
uint32_t features2,
uint64_t features2,
uint32_t midr,
uint32_t architecture_version,
uint32_t architecture_flags,
@@ -147,6 +147,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
"VDOT instructions disabled: cause occasional SIGILL on Spreadtrum SC9863A");
} else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_t && chipset->model == 310) {
cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc T310");
} else if (chipset->series == cpuinfo_arm_chipset_series_unisoc_ums && chipset->model == 312) {
cpuinfo_log_warning("VDOT instructions disabled: cause occasional SIGILL on Unisoc UMS312");
} else {
switch (midr & (CPUINFO_ARM_MIDR_IMPLEMENTER_MASK | CPUINFO_ARM_MIDR_PART_MASK)) {
case UINT32_C(0x4100D0B0): /* Cortex-A76 */

View File

@@ -3,9 +3,11 @@
#include <arm/linux/api.h>
#include <cpuinfo/log.h>
#include <sys/prctl.h>
void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
uint32_t features,
uint32_t features2,
uint64_t features2,
uint32_t midr,
const struct cpuinfo_arm_chipset chipset[restrict static 1],
struct cpuinfo_arm_isa isa[restrict static 1]) {
@@ -142,6 +144,27 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) {
isa->sve2 = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME) {
isa->sme = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME2) {
isa->sme2 = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME2P1) {
isa->sme2p1 = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_I16I32) {
isa->sme_i16i32 = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_BI32I32) {
isa->sme_bi32i32 = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_B16B16) {
isa->sme_b16b16 = true;
}
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SME_F16F16) {
isa->sme_f16f16 = true;
}
// SVEBF16 is set iff SVE and BF16 are both supported, but the SVEBF16
// feature flag was added in Linux kernel before the BF16 feature flag,
// so we check for either.
@@ -151,4 +174,21 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
if (features & CPUINFO_ARM_LINUX_FEATURE_ASIMDFHM) {
isa->fhm = true;
}
#ifndef PR_SVE_GET_VL
#define PR_SVE_GET_VL 51
#endif
#ifndef PR_SVE_VL_LEN_MASK
#define PR_SVE_VL_LEN_MASK 0xffff
#endif
int ret = prctl(PR_SVE_GET_VL);
if (ret < 0) {
cpuinfo_log_warning("No SVE support on this machine");
isa->svelen = 0; // Assume no SVE support if the call fails
} else {
// Mask out the SVE vector length bits
isa->svelen = ret & PR_SVE_VL_LEN_MASK;
}
}

View File

@@ -137,6 +137,13 @@ struct cpuinfo_arm_linux_proc_cpuinfo_cache {
#define CPUINFO_ARM_LINUX_FEATURE2_DGH UINT32_C(0x00008000)
#define CPUINFO_ARM_LINUX_FEATURE2_RNG UINT32_C(0x00010000)
#define CPUINFO_ARM_LINUX_FEATURE2_BTI UINT32_C(0x00020000)
#define CPUINFO_ARM_LINUX_FEATURE2_SME UINT32_C(0x00800000)
#define CPUINFO_ARM_LINUX_FEATURE2_SME2 UINT64_C(0x0000002000000000)
#define CPUINFO_ARM_LINUX_FEATURE2_SME2P1 UINT64_C(0x0000004000000000)
#define CPUINFO_ARM_LINUX_FEATURE2_SME_I16I32 UINT64_C(0x0000008000000000)
#define CPUINFO_ARM_LINUX_FEATURE2_SME_BI32I32 UINT64_C(0x0000010000000000)
#define CPUINFO_ARM_LINUX_FEATURE2_SME_B16B16 UINT64_C(0x0000020000000000)
#define CPUINFO_ARM_LINUX_FEATURE2_SME_F16F16 UINT64_C(0x0000040000000000)
#endif
#define CPUINFO_ARM_LINUX_VALID_ARCHITECTURE UINT32_C(0x00010000)
@@ -172,7 +179,7 @@ struct cpuinfo_arm_linux_processor {
struct cpuinfo_arm_linux_proc_cpuinfo_cache proc_cpuinfo_cache;
#endif
uint32_t features;
uint32_t features2;
uint64_t features2;
/**
* Main ID Register value.
*/
@@ -295,14 +302,14 @@ CPUINFO_INTERNAL bool cpuinfo_arm_linux_parse_proc_cpuinfo(
#if CPUINFO_ARCH_ARM
CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_getauxval(
uint32_t hwcap[restrict static 1],
uint32_t hwcap2[restrict static 1]);
uint64_t hwcap2[restrict static 1]);
CPUINFO_INTERNAL bool cpuinfo_arm_linux_hwcap_from_procfs(
uint32_t hwcap[restrict static 1],
uint32_t hwcap2[restrict static 1]);
uint64_t hwcap2[restrict static 1]);
CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
uint32_t features,
uint32_t features2,
uint64_t features2,
uint32_t midr,
uint32_t architecture_version,
uint32_t architecture_flags,
@@ -311,11 +318,11 @@ CPUINFO_INTERNAL void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
#elif CPUINFO_ARCH_ARM64
CPUINFO_INTERNAL void cpuinfo_arm_linux_hwcap_from_getauxval(
uint32_t hwcap[restrict static 1],
uint32_t hwcap2[restrict static 1]);
uint64_t hwcap2[restrict static 1]);
CPUINFO_INTERNAL void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
uint32_t features,
uint32_t features2,
uint64_t features2,
uint32_t midr,
const struct cpuinfo_arm_chipset chipset[restrict static 1],
struct cpuinfo_arm_isa isa[restrict static 1]);

View File

@@ -85,6 +85,7 @@ static enum cpuinfo_arm_chipset_vendor chipset_series_vendor[cpuinfo_arm_chipset
[cpuinfo_arm_chipset_series_telechips_tcc] = cpuinfo_arm_chipset_vendor_telechips,
[cpuinfo_arm_chipset_series_texas_instruments_omap] = cpuinfo_arm_chipset_vendor_texas_instruments,
[cpuinfo_arm_chipset_series_unisoc_t] = cpuinfo_arm_chipset_vendor_unisoc,
[cpuinfo_arm_chipset_series_unisoc_ums] = cpuinfo_arm_chipset_vendor_unisoc,
[cpuinfo_arm_chipset_series_wondermedia_wm] = cpuinfo_arm_chipset_vendor_wondermedia,
};
@@ -959,6 +960,70 @@ static bool match_t(const char* start, const char* end, struct cpuinfo_arm_chips
return true;
}
/**
* Tries to match, case-sentitively, /Unisoc UMS\d{3,4}/ signature for Unisoc UMS
* chipset. If match successful, extracts model information into \p chipset
* argument.
*
* @param start - start of the platform identifier (/proc/cpuinfo Hardware
* string, ro.product.board, ro.board.platform, or ro.chipname) to match.
* @param end - end of the platform identifier (/proc/cpuinfo Hardware string,
* ro.product.board, ro.board.platform, or ro.chipname) to match.
* @param[out] chipset - location where chipset information will be stored upon
* a successful match.
*
* @returns true if signature matched, false otherwise.
*/
static bool match_ums(const char* start, const char* end, struct cpuinfo_arm_chipset chipset[restrict static 1]) {
/* Expect 13-14 symbols: "Unisoc UMS" (10 symbols) + 3-4-digit model number
*/
const size_t length = end - start;
switch (length) {
case 13:
case 14:
break;
default:
return false;
}
/* Check that string starts with "Unisoc UMS". The first four characters
* are loaded as 32-bit little endian word */
const uint32_t expected_unis = load_u32le(start);
if (expected_unis != UINT32_C(0x73696E55) /* "sinU" = reverse("Unis") */) {
return false;
}
/* The next four characters are loaded as 32-bit little endian word */
const uint32_t expected_oc_u = load_u32le(start + 4);
if (expected_oc_u != UINT32_C(0x5520636F) /* "U co" = reverse("oc U") */) {
return false;
}
/* The next four characters are loaded as 16-bit little endian word */
const uint16_t expected_ms = load_u16le(start + 8);
if (expected_ms != UINT16_C(0x534D) /* "SM" = reverse("MS") */) {
return false;
}
/* Validate and parse 3-4 digit model number */
uint32_t model = 0;
for (uint32_t i = 10; i < length; i++) {
const uint32_t digit = (uint32_t)(uint8_t)start[i] - '0';
if (digit >= 10) {
/* Not really a digit */
return false;
}
model = model * 10 + digit;
}
*chipset = (struct cpuinfo_arm_chipset){
.vendor = cpuinfo_arm_chipset_vendor_unisoc,
.series = cpuinfo_arm_chipset_series_unisoc_ums,
.model = model,
};
return true;
}
/**
* Tries to match /lc\d{4}[a-z]?$/ signature for Leadcore LC chipsets.
* If match successful, extracts model information into \p chipset argument.
@@ -2508,6 +2573,16 @@ struct cpuinfo_arm_chipset cpuinfo_arm_linux_decode_chipset_from_proc_cpuinfo_ha
return chipset;
}
/* Check Unisoc UMS signature */
if (match_ums(hardware, hardware_end, &chipset)) {
cpuinfo_log_debug(
"matched Unisoc UMS signature in /proc/cpuinfo Hardware string \"%.*s\"",
(int)hardware_length,
hardware);
return chipset;
}
#if CPUINFO_ARCH_ARM
/* Check Marvell PXA signature */
if (match_pxa(hardware, hardware_end, &chipset)) {
@@ -3726,6 +3801,7 @@ static const char* chipset_series_string[cpuinfo_arm_chipset_series_max] = {
[cpuinfo_arm_chipset_series_telechips_tcc] = "TCC",
[cpuinfo_arm_chipset_series_texas_instruments_omap] = "OMAP",
[cpuinfo_arm_chipset_series_unisoc_t] = "T",
[cpuinfo_arm_chipset_series_unisoc_ums] = "UMS",
[cpuinfo_arm_chipset_series_wondermedia_wm] = "WM",
};

View File

@@ -31,8 +31,8 @@ void cpuinfo_set_hwcap(uint32_t hwcap) {
mock_hwcap = hwcap;
}
static uint32_t mock_hwcap2 = 0;
void cpuinfo_set_hwcap2(uint32_t hwcap2) {
static uint64_t mock_hwcap2 = 0;
void cpuinfo_set_hwcap2(uint64_t hwcap2) {
mock_hwcap2 = hwcap2;
}
#endif
@@ -40,7 +40,7 @@ void cpuinfo_set_hwcap2(uint32_t hwcap2) {
#if CPUINFO_ARCH_ARM
typedef unsigned long (*getauxval_function_t)(unsigned long);
bool cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint32_t hwcap2[restrict static 1]) {
bool cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) {
#if CPUINFO_MOCK
*hwcap = mock_hwcap;
*hwcap2 = mock_hwcap2;
@@ -83,13 +83,13 @@ cleanup:
}
#ifdef __ANDROID__
bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint32_t hwcap2[restrict static 1]) {
bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) {
#if CPUINFO_MOCK
*hwcap = mock_hwcap;
*hwcap2 = mock_hwcap2;
return true;
#else
uint32_t hwcaps[2] = {0, 0};
uint64_t hwcaps[2] = {0, 0};
bool result = false;
int file = -1;
@@ -113,7 +113,7 @@ bool cpuinfo_arm_linux_hwcap_from_procfs(uint32_t hwcap[restrict static 1], uint
hwcaps[0] = (uint32_t)elf_auxv.a_un.a_val;
break;
case AT_HWCAP2:
hwcaps[1] = (uint32_t)elf_auxv.a_un.a_val;
hwcaps[1] = (uint64_t)elf_auxv.a_un.a_val;
break;
}
} else {
@@ -141,13 +141,13 @@ cleanup:
}
#endif /* __ANDROID__ */
#elif CPUINFO_ARCH_ARM64
void cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint32_t hwcap2[restrict static 1]) {
void cpuinfo_arm_linux_hwcap_from_getauxval(uint32_t hwcap[restrict static 1], uint64_t hwcap2[restrict static 1]) {
#if CPUINFO_MOCK
*hwcap = mock_hwcap;
*hwcap2 = mock_hwcap2;
#else
*hwcap = (uint32_t)getauxval(AT_HWCAP);
*hwcap2 = (uint32_t)getauxval(AT_HWCAP2);
*hwcap2 = (uint64_t)getauxval(AT_HWCAP2);
return;
#endif
}

View File

@@ -247,7 +247,8 @@ void cpuinfo_arm_linux_init(void) {
#endif
#if CPUINFO_ARCH_ARM
uint32_t isa_features = 0, isa_features2 = 0;
uint32_t isa_features = 0;
uint64_t isa_features2 = 0;
#ifdef __ANDROID__
/*
* On Android before API 20, libc.so does not provide getauxval
@@ -299,7 +300,8 @@ void cpuinfo_arm_linux_init(void) {
&chipset,
&cpuinfo_isa);
#elif CPUINFO_ARCH_ARM64
uint32_t isa_features = 0, isa_features2 = 0;
uint32_t isa_features = 0;
uint64_t isa_features2 = 0;
/* getauxval is always available on ARM64 Android */
cpuinfo_arm_linux_hwcap_from_getauxval(&isa_features, &isa_features2);
cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
@@ -333,18 +335,52 @@ void cpuinfo_arm_linux_init(void) {
}
/* Propagate topology group IDs among siblings */
bool detected_core_siblings_list_node = false;
bool detected_cluster_cpus_list_node = false;
for (uint32_t i = 0; i < arm_linux_processors_count; i++) {
if (!bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
continue;
}
if (arm_linux_processors[i].flags & CPUINFO_LINUX_FLAG_PACKAGE_ID) {
if (!bitmask_all(arm_linux_processors[i].flags, CPUINFO_LINUX_FLAG_PACKAGE_ID)) {
continue;
}
/* Use the cluster_cpus_list topology node if available. If not
* found, cache the result to avoid repeatedly attempting to
* read the non-existent paths.
* */
if (!detected_core_siblings_list_node && !detected_cluster_cpus_list_node) {
if (cpuinfo_linux_detect_cluster_cpus(
arm_linux_processors_count,
i,
(cpuinfo_siblings_callback)cluster_siblings_parser,
arm_linux_processors)) {
detected_cluster_cpus_list_node = true;
continue;
} else {
detected_core_siblings_list_node = true;
}
}
/* The cached result above will guarantee only one of the blocks
* below will execute, with a bias towards cluster_cpus_list.
**/
if (detected_core_siblings_list_node) {
cpuinfo_linux_detect_core_siblings(
arm_linux_processors_count,
i,
(cpuinfo_siblings_callback)cluster_siblings_parser,
arm_linux_processors);
}
if (detected_cluster_cpus_list_node) {
cpuinfo_linux_detect_cluster_cpus(
arm_linux_processors_count,
i,
(cpuinfo_siblings_callback)cluster_siblings_parser,
arm_linux_processors);
}
}
/* Propagate all cluster IDs */

View File

@@ -399,6 +399,16 @@ void cpuinfo_arm_mach_init(void) {
cpuinfo_isa.i8mm = true;
}
const uint32_t has_feat_sme = get_sys_info_by_name("hw.optional.arm.FEAT_SME");
if (has_feat_sme != 0) {
cpuinfo_isa.sme = true;
}
const uint32_t has_feat_sme2 = get_sys_info_by_name("hw.optional.arm.FEAT_SME2");
if (has_feat_sme2 != 0) {
cpuinfo_isa.sme2 = true;
}
uint32_t num_clusters = 1;
for (uint32_t i = 0; i < mach_topology.cores; i++) {
cores[i] = (struct cpuinfo_core){

View File

@@ -24,8 +24,10 @@ static char* sysctl_str(const char* name) {
size_t value_size = 0;
if (sysctlbyname(name, NULL, &value_size, NULL, 0) != 0) {
cpuinfo_log_error("sysctlbyname(\"%s\") failed: %s", name, strerror(errno));
return NULL;
} else if (value_size <= 0) {
cpuinfo_log_error("sysctlbyname(\"%s\") returned invalid value size %zu", name, value_size);
return NULL;
}
value_size += 1;
char* value = calloc(value_size, 1);
@@ -52,29 +54,22 @@ struct cpuinfo_freebsd_topology cpuinfo_freebsd_detect_topology(void) {
if (!topology_spec) {
return topology;
}
const char* group_tag = "<group level=\"1\" cache-level=\"0\">";
char* p = strstr(topology_spec, group_tag);
while (p) {
const char* cpu_tag = "cpu count=\"";
char* q = strstr(p, cpu_tag);
if (q) {
p = q + strlen(cpu_tag);
topology.packages += atoi(p);
} else {
break;
}
}
if (topology.packages == 0) {
const char* group_tag = "<group level=\"1\"";
const char* group_tags[] = {"<group level=\"2\" cache-level=\"0\">", "<group level=\"1\" "};
for (size_t i = 0; i < sizeof(group_tags) / sizeof(group_tags[0]); i++) {
const char* group_tag = group_tags[i];
char* p = strstr(topology_spec, group_tag);
while (p) {
topology.packages += 1;
p++;
p = strstr(p, group_tag);
}
if (topology.packages > 0) {
break;
}
}
if (topology.packages == 0) {
cpuinfo_log_error("failed to parse topology_spec:%s", topology_spec);
cpuinfo_log_error("failed to parse topology_spec: %s", topology_spec);
free(topology_spec);
goto fail;
}
@@ -84,6 +79,7 @@ struct cpuinfo_freebsd_topology cpuinfo_freebsd_detect_topology(void) {
goto fail;
}
if (topology.cores < topology.packages) {
cpuinfo_log_error("invalid numbers of package and core: %d %d", topology.packages, topology.cores);
goto fail;
}
topology.threads_per_core = sysctl_int("kern.smp.threads_per_core");

View File

@@ -135,6 +135,10 @@ void cpuinfo_x86_freebsd_init(void) {
if (x86_processor.cache.l1i.size != 0 || x86_processor.cache.l1d.size != 0) {
/* Assume that threads on the same core share L1 */
threads_per_l1 = freebsd_topology.threads / freebsd_topology.cores;
if (threads_per_l1 == 0) {
cpuinfo_log_error("failed to detect threads_per_l1");
goto cleanup;
}
cpuinfo_log_warning(
"freebsd kernel did not report number of "
"threads sharing L1 cache; assume %" PRIu32,
@@ -154,6 +158,10 @@ void cpuinfo_x86_freebsd_init(void) {
* the same package share L2 */
threads_per_l2 = freebsd_topology.threads / freebsd_topology.packages;
}
if (threads_per_l2 == 0) {
cpuinfo_log_error("failed to detect threads_per_l1");
goto cleanup;
}
cpuinfo_log_warning(
"freebsd kernel did not report number of "
"threads sharing L2 cache; assume %" PRIu32,
@@ -170,6 +178,10 @@ void cpuinfo_x86_freebsd_init(void) {
* may be L4 cache as well)
*/
threads_per_l3 = freebsd_topology.threads / freebsd_topology.packages;
if (threads_per_l3 == 0) {
cpuinfo_log_error("failed to detect threads_per_l3");
goto cleanup;
}
cpuinfo_log_warning(
"freebsd kernel did not report number of "
"threads sharing L3 cache; assume %" PRIu32,
@@ -187,6 +199,10 @@ void cpuinfo_x86_freebsd_init(void) {
* shared L4 (like on IBM POWER8).
*/
threads_per_l4 = freebsd_topology.threads;
if (threads_per_l4 == 0) {
cpuinfo_log_error("failed to detect threads_per_l4");
goto cleanup;
}
cpuinfo_log_warning(
"freebsd kernel did not report number of "
"threads sharing L4 cache; assume %" PRIu32,
@@ -203,7 +219,7 @@ void cpuinfo_x86_freebsd_init(void) {
"%" PRIu32 " L1I caches",
l1_count * sizeof(struct cpuinfo_cache),
l1_count);
return;
goto cleanup;
}
for (uint32_t c = 0; c < l1_count; c++) {
l1i[c] = (struct cpuinfo_cache){
@@ -230,7 +246,7 @@ void cpuinfo_x86_freebsd_init(void) {
"%" PRIu32 " L1D caches",
l1_count * sizeof(struct cpuinfo_cache),
l1_count);
return;
goto cleanup;
}
for (uint32_t c = 0; c < l1_count; c++) {
l1d[c] = (struct cpuinfo_cache){
@@ -257,7 +273,7 @@ void cpuinfo_x86_freebsd_init(void) {
"%" PRIu32 " L2 caches",
l2_count * sizeof(struct cpuinfo_cache),
l2_count);
return;
goto cleanup;
}
for (uint32_t c = 0; c < l2_count; c++) {
l2[c] = (struct cpuinfo_cache){
@@ -284,7 +300,7 @@ void cpuinfo_x86_freebsd_init(void) {
"%" PRIu32 " L3 caches",
l3_count * sizeof(struct cpuinfo_cache),
l3_count);
return;
goto cleanup;
}
for (uint32_t c = 0; c < l3_count; c++) {
l3[c] = (struct cpuinfo_cache){
@@ -311,7 +327,7 @@ void cpuinfo_x86_freebsd_init(void) {
"%" PRIu32 " L4 caches",
l4_count * sizeof(struct cpuinfo_cache),
l4_count);
return;
goto cleanup;
}
for (uint32_t c = 0; c < l4_count; c++) {
l4[c] = (struct cpuinfo_cache){

View File

@@ -1,5 +1,5 @@
/* 7zCrc.h -- CRC32 calculation
2023-04-02 : Igor Pavlov : Public domain */
2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_7Z_CRC_H
#define ZIP7_INC_7Z_CRC_H
@@ -20,7 +20,8 @@ void Z7_FASTCALL CrcGenerateTable(void);
UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size);
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size);
typedef UInt32 (Z7_FASTCALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_FUNC)(UInt32 v, const void *data, size_t size);
Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo);
EXTERN_C_END

View File

@@ -1,5 +1,5 @@
/* 7zTypes.h -- Basic types
2023-04-02 : Igor Pavlov : Public domain */
2024-01-24 : Igor Pavlov : Public domain */
#ifndef ZIP7_7Z_TYPES_H
#define ZIP7_7Z_TYPES_H
@@ -530,20 +530,20 @@ struct ISzAlloc
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
*/
#if defined (__clang__) || defined(__GNUC__)
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \
#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL
#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
#endif
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)

View File

@@ -1,7 +1,7 @@
#define MY_VER_MAJOR 23
#define MY_VER_MINOR 01
#define MY_VER_MAJOR 24
#define MY_VER_MINOR 8
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "23.01"
#define MY_VERSION_NUMBERS "24.08"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION
#endif
#define MY_DATE "2023-06-20"
#define MY_DATE "2024-08-11"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2023 Igor Pavlov"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR

View File

@@ -1,5 +1,5 @@
/* Alloc.h -- Memory allocation functions
2023-03-04 : Igor Pavlov : Public domain */
2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_ALLOC_H
#define ZIP7_INC_ALLOC_H
@@ -22,6 +22,9 @@ void *MyAlloc(size_t size);
void MyFree(void *address);
void *MyRealloc(void *address, size_t size);
void *z7_AlignedAlloc(size_t size);
void z7_AlignedFree(void *p);
#ifdef _WIN32
#ifdef Z7_LARGE_PAGES
@@ -33,12 +36,14 @@ void MidFree(void *address);
void *BigAlloc(size_t size);
void BigFree(void *address);
/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */
#else
#define MidAlloc(size) MyAlloc(size)
#define MidFree(address) MyFree(address)
#define BigAlloc(size) MyAlloc(size)
#define BigFree(address) MyFree(address)
#define MidAlloc(size) z7_AlignedAlloc(size)
#define MidFree(address) z7_AlignedFree(address)
#define BigAlloc(size) z7_AlignedAlloc(size)
#define BigFree(address) z7_AlignedFree(address)
#endif

View File

@@ -1,5 +1,5 @@
/* Bra.h -- Branch converters for executables
2023-04-02 : Igor Pavlov : Public domain */
2024-01-20 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_BRA_H
#define ZIP7_INC_BRA_H
@@ -8,8 +8,12 @@
EXTERN_C_BEGIN
#define Z7_BRANCH_CONV_DEC(name) z7_BranchConv_ ## name ## _Dec
#define Z7_BRANCH_CONV_ENC(name) z7_BranchConv_ ## name ## _Enc
/* #define PPC BAD_PPC_11 // for debug */
#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec
#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc
#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name)
#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name)
#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec
#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc
@@ -20,19 +24,20 @@ typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv));
typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt));
#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0
Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_DEC(X86));
Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_ENC(X86));
Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86));
Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86));
#define Z7_BRANCH_FUNCS_DECL(name) \
Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_DEC(name)); \
Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_ENC(name));
Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \
Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name));
Z7_BRANCH_FUNCS_DECL(ARM64)
Z7_BRANCH_FUNCS_DECL(ARM)
Z7_BRANCH_FUNCS_DECL(ARMT)
Z7_BRANCH_FUNCS_DECL(PPC)
Z7_BRANCH_FUNCS_DECL(SPARC)
Z7_BRANCH_FUNCS_DECL(IA64)
Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64)
Z7_BRANCH_FUNCS_DECL (BranchConv_ARM)
Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT)
Z7_BRANCH_FUNCS_DECL (BranchConv_PPC)
Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC)
Z7_BRANCH_FUNCS_DECL (BranchConv_IA64)
Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV)
/*
These functions convert data that contain CPU instructions.
@@ -49,14 +54,14 @@ and one for decoding (_Enc/_Dec postfixes in function name).
In params:
data : data buffer
size : size of data
pc : current virtual Program Counter (Instruction Pinter) value
pc : current virtual Program Counter (Instruction Pointer) value
In/Out param:
state : pointer to state variable (for X86 converter only)
Return:
The pointer to position in (data) buffer after last byte that was processed.
If the caller calls converter again, it must call it starting with that position.
But the caller is allowed to move data in buffer. so pointer to
But the caller is allowed to move data in buffer. So pointer to
current processed position also will be changed for next call.
Also the caller must increase internal (pc) value for next call.
@@ -65,6 +70,7 @@ Each converter has some characteristics: Endian, Alignment, LookAhead.
X86 little 1 4
ARMT little 2 2
RISCV little 2 6
ARM little 4 0
ARM64 little 4 0
PPC big 4 0

View File

@@ -1,5 +1,5 @@
/* Compiler.h : Compiler specific defines and pragmas
2023-04-02 : Igor Pavlov : Public domain */
2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_COMPILER_H
#define ZIP7_INC_COMPILER_H
@@ -25,11 +25,79 @@
#define Z7_MINGW
#endif
#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__))
#define Z7_MCST_LCC
#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__)
#endif
/*
#if defined(__AVX2__) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
|| defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
|| defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
#define Z7_COMPILER_AVX2_SUPPORTED
#endif
#endif
*/
// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
#ifdef __clang__
// padding size of '' with 4 bytes to alignment boundary
#pragma GCC diagnostic ignored "-Wpadded"
#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \
&& defined(__FreeBSD__)
// freebsd:
#pragma GCC diagnostic ignored "-Wexcess-padding"
#endif
#if __clang_major__ >= 16
#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage"
#endif
#if __clang_major__ == 13
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
// cheri
#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast"
#endif
#endif
#if __clang_major__ == 13
// for <arm_neon.h>
#pragma GCC diagnostic ignored "-Wreserved-identifier"
#endif
#endif // __clang__
#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16
// #pragma GCC diagnostic ignored "-Wcast-function-type-strict"
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \
_Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"")
#else
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
#endif
typedef void (*Z7_void_Function)(void);
#if defined(__clang__) || defined(__GNUC__)
#define Z7_CAST_FUNC_C (Z7_void_Function)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define Z7_CAST_FUNC_C (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define Z7_CAST_FUNC_C
#endif
/*
#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
// #pragma GCC diagnostic ignored "-Wcast-function-type"
#endif
*/
#ifdef __GNUC__
#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000)
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
#endif
#endif
@@ -101,7 +169,8 @@
_Pragma("clang loop unroll(disable)") \
_Pragma("clang loop vectorize(disable)")
#define Z7_ATTRIB_NO_VECTORIZE
#elif defined(__GNUC__) && (__GNUC__ >= 5)
#elif defined(__GNUC__) && (__GNUC__ >= 5) \
&& (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610))
#define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
// __attribute__((optimize("no-unroll-loops")));
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
@@ -142,15 +211,23 @@
#endif
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000))
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600))
#if (Z7_CLANG_VERSION < 130000)
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"")
#else
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
#endif
#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#define UNUSED_VAR(x) (void)x;

View File

@@ -1,5 +1,5 @@
/* CpuArch.h -- CPU specific code
2023-04-02 : Igor Pavlov : Public domain */
2024-06-17 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_CPU_ARCH_H
#define ZIP7_INC_CPU_ARCH_H
@@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/
#if !defined(_M_ARM64EC)
#if defined(_M_X64) \
|| defined(_M_AMD64) \
|| defined(__x86_64__) \
@@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#define MY_CPU_64BIT
#endif
#endif
#if defined(_M_IX86) \
@@ -47,17 +49,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#if defined(_M_ARM64) \
|| defined(_M_ARM64EC) \
|| defined(__AARCH64EL__) \
|| defined(__AARCH64EB__) \
|| defined(__aarch64__)
#define MY_CPU_ARM64
#ifdef __ILP32__
#if defined(__ILP32__) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "arm64-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
#define MY_CPU_NAME "arm64-128"
#define MY_CPU_SIZEOF_POINTER 16
#else
#if defined(_M_ARM64EC)
#define MY_CPU_NAME "arm64ec"
#else
#define MY_CPU_NAME "arm64"
#endif
#define MY_CPU_SIZEOF_POINTER 8
#endif
#endif
#define MY_CPU_64BIT
#endif
@@ -133,8 +144,36 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#if defined(__sparc__) \
|| defined(__sparc)
#define MY_CPU_SPARC
#if defined(__LP64__) \
|| defined(_LP64) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
#define MY_CPU_NAME "sparcv9"
#define MY_CPU_SIZEOF_POINTER 8
#define MY_CPU_64BIT
#elif defined(__sparc_v9__) \
|| defined(__sparcv9)
#define MY_CPU_64BIT
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "sparcv9-32"
#else
#define MY_CPU_NAME "sparcv9m"
#endif
#elif defined(__sparc_v8__) \
|| defined(__sparcv8)
#define MY_CPU_NAME "sparcv8"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "sparc"
#endif
#endif
#if defined(__riscv) \
|| defined(__riscv__)
#define MY_CPU_RISCV
#if __riscv_xlen == 32
#define MY_CPU_NAME "riscv32"
#elif __riscv_xlen == 64
@@ -145,6 +184,39 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif
#if defined(__loongarch__)
#define MY_CPU_LOONGARCH
#if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64)
#define MY_CPU_64BIT
#endif
#if defined(__loongarch64)
#define MY_CPU_NAME "loongarch64"
#define MY_CPU_LOONGARCH64
#else
#define MY_CPU_NAME "loongarch"
#endif
#endif
// #undef MY_CPU_NAME
// #undef MY_CPU_SIZEOF_POINTER
// #define __e2k__
// #define __SIZEOF_POINTER__ 4
#if defined(__e2k__)
#define MY_CPU_E2K
#if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
#define MY_CPU_NAME "e2k-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "e2k"
#if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
#define MY_CPU_SIZEOF_POINTER 8
#endif
#endif
#define MY_CPU_64BIT
#endif
#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
#define MY_CPU_X86_OR_AMD64
#endif
@@ -175,6 +247,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(MY_CPU_ARM_LE) \
|| defined(MY_CPU_ARM64_LE) \
|| defined(MY_CPU_IA64_LE) \
|| defined(_LITTLE_ENDIAN) \
|| defined(__LITTLE_ENDIAN__) \
|| defined(__ARMEL__) \
|| defined(__THUMBEL__) \
@@ -251,6 +324,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifndef MY_CPU_NAME
// #define MY_CPU_IS_UNKNOWN
#ifdef MY_CPU_LE
#define MY_CPU_NAME "LE"
#elif defined(MY_CPU_BE)
@@ -295,9 +369,19 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define Z7_BSWAP64(v) _byteswap_uint64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16))
/* GCC can generate slow code that calls function for __builtin_bswap32() for:
- GCC for RISCV, if Zbb/XTHeadBb extension is not used.
- GCC for SPARC.
The code from CLANG for SPARC also is not fastest.
So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases.
*/
#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \
&& !defined(MY_CPU_SPARC) \
&& ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \
)
#define Z7_BSWAP16(v) __builtin_bswap16(v)
#define Z7_BSWAP32(v) __builtin_bswap32(v)
#define Z7_BSWAP64(v) __builtin_bswap64(v)
@@ -329,13 +413,48 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64)
|| defined(MY_CPU_ARM64) \
|| defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \
|| defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6)
#define MY_CPU_LE_UNALIGN
#define MY_CPU_LE_UNALIGN_64
#elif defined(__ARM_FEATURE_UNALIGNED)
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
So we can't use unaligned 64-bit operations. */
#define MY_CPU_LE_UNALIGN
/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions.
Description of problems:
problem-1 : 32-bit ARM architecture:
multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM)
require 32-bit (WORD) alignment (by 32-bit ARM architecture).
So there is "Alignment fault exception", if data is not aligned for 32-bit.
problem-2 : 32-bit kernels and arm64 kernels:
32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception".
So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux.
But some arm64 kernels do not handle these faults in 32-bit programs.
So we have unhandled exception for such instructions.
Probably some new arm64 kernels have fixed it, and unaligned
paired-access instructions work in new kernels?
problem-3 : compiler for 32-bit arm:
Compilers use LDRD/STRD/LDM/STM for UInt64 accesses
and for another cases where two 32-bit accesses are fused
to one multi-access instruction.
So UInt64 variables must be aligned for 32-bit, and each
32-bit access must be aligned for 32-bit, if we want to
avoid "Alignment fault" exception (handled or unhandled).
problem-4 : performace:
Even if unaligned access is handled by kernel, it will be slow.
So if we allow unaligned access, we can get fast unaligned
single-access, and slow unaligned paired-access.
We don't allow unaligned access on 32-bit arm, because compiler
genarates paired-access instructions that require 32-bit alignment,
and some arm64 kernels have no handler for these instructions.
Also unaligned paired-access instructions will be slow, if kernel handles them.
*/
// it must be disabled:
// #define MY_CPU_LE_UNALIGN
#endif
#endif
@@ -439,11 +558,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#if defined(MY_CPU_BE)
#define GetBe64a(p) (*(const UInt64 *)(const void *)(p))
#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetUi64a(p) GetUi64(p)
#define GetUi32a(p) GetUi32(p)
#define GetUi16a(p) GetUi16(p)
#define SetUi32a(p, v) SetUi32(p, v)
@@ -451,11 +572,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#elif defined(MY_CPU_LE)
#define GetUi64a(p) (*(const UInt64 *)(const void *)(p))
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetBe64a(p) GetBe64(p)
#define GetBe32a(p) GetBe32(p)
#define GetBe16a(p) GetBe16(p)
#define SetBe32a(p, v) SetBe32(p, v)
@@ -486,6 +609,7 @@ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX(void);
BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_AVX512F_AVX512VL(void);
BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_CMOV(void);
BoolInt CPU_IsSupported_SSE(void);

View File

@@ -1,5 +1,5 @@
/* LzFind.h -- Match finder for LZ algorithms
2023-03-04 : Igor Pavlov : Public domain */
2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZ_FIND_H
#define ZIP7_INC_LZ_FIND_H
@@ -144,7 +144,8 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p);
// void MatchFinder_Init(CMatchFinder *p);
void MatchFinder_Init(void *p);
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);

View File

@@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2023-03-05 : Igor Pavlov : Public domain */
2024-01-22 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZ_FIND_MT_H
#define ZIP7_INC_LZ_FIND_MT_H
@@ -31,7 +31,10 @@ typedef struct
// UInt32 numBlocks_Sent;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
struct CMatchFinderMt_;
typedef UInt32 * (*Mf_Mix_Matches)(struct CMatchFinderMt_ *p, UInt32 matchMinPos, UInt32 *distances);
/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */
#define kMtCacheLineDummy 128
@@ -39,7 +42,7 @@ typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distance
typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
typedef struct
typedef struct CMatchFinderMt_
{
/* LZ */
const Byte *pointerToCurPos;

View File

@@ -1,10 +1,127 @@
/* Precomp.h -- StdAfx
2023-04-02 : Igor Pavlov : Public domain */
/* Precomp.h -- precompilation file
2024-01-25 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_PRECOMP_H
#define ZIP7_INC_PRECOMP_H
/*
this file must be included before another *.h files and before <windows.h>.
this file is included from the following files:
C\*.c
C\Util\*\Precomp.h <- C\Util\*\*.c
CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp
this file can set the following macros:
Z7_LARGE_PAGES 1
Z7_LONG_PATH 1
Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip
_WIN32_WINNT 0x0500 (or higher)
WINVER _WIN32_WINNT
UNICODE 1
_UNICODE 1
*/
#include "Compiler.h"
/* #include "7zTypes.h" */
#ifdef _MSC_VER
// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
#if _MSC_VER >= 1912
// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception.
#endif
#endif
/*
// for debug:
#define UNICODE 1
#define _UNICODE 1
#define _WIN32_WINNT 0x0500 // win2000
#ifndef WINVER
#define WINVER _WIN32_WINNT
#endif
*/
#ifdef _WIN32
/*
this "Precomp.h" file must be included before <windows.h>,
if we want to define _WIN32_WINNT before <windows.h>.
*/
#ifndef Z7_LARGE_PAGES
#ifndef Z7_NO_LARGE_PAGES
#define Z7_LARGE_PAGES 1
#endif
#endif
#ifndef Z7_LONG_PATH
#ifndef Z7_NO_LONG_PATH
#define Z7_LONG_PATH 1
#endif
#endif
#ifndef Z7_DEVICE_FILE
#ifndef Z7_NO_DEVICE_FILE
// #define Z7_DEVICE_FILE 1
#endif
#endif
// we don't change macros if included after <windows.h>
#ifndef _WINDOWS_
#ifndef Z7_WIN32_WINNT_MIN
#if defined(_M_ARM64) || defined(__aarch64__)
// #define Z7_WIN32_WINNT_MIN 0x0a00 // win10
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
#elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT)
// #define Z7_WIN32_WINNT_MIN 0x0602 // win8
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
#elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64)
#define Z7_WIN32_WINNT_MIN 0x0503 // win2003
// #elif defined(_M_IX86) || defined(__i386__)
// #define Z7_WIN32_WINNT_MIN 0x0500 // win2000
#else // x86 and another(old) systems
#define Z7_WIN32_WINNT_MIN 0x0500 // win2000
// #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug
#endif
#endif // Z7_WIN32_WINNT_MIN
#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT
#ifdef _WIN32_WINNT
// #error Stop_Compiling_Bad_WIN32_WINNT
#else
#ifndef Z7_NO_DEFINE_WIN32_WINNT
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _WIN32_WINNT Z7_WIN32_WINNT_MIN
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // _WIN32_WINNT
#ifndef WINVER
#define WINVER _WIN32_WINNT
#endif
#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT
#ifndef _MBCS
#ifndef Z7_NO_UNICODE
// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code.
#ifndef UNICODE
#define UNICODE 1
#endif
#ifndef _UNICODE
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _UNICODE 1
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // Z7_NO_UNICODE
#endif // _MBCS
#endif // _WINDOWS_
// #include "7zWindows.h"
#endif // _WIN32
#endif

View File

@@ -1,5 +1,5 @@
/* Threads.h -- multithreading library
2023-04-02 : Igor Pavlov : Public domain */
2024-03-28 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_THREADS_H
#define ZIP7_INC_THREADS_H
@@ -9,12 +9,21 @@
#else
#include "Compiler.h"
// #define Z7_AFFINITY_DISABLE
#if defined(__linux__)
#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef Z7_AFFINITY_DISABLE
#define Z7_AFFINITY_SUPPORTED
// #pragma message(" ==== Z7_AFFINITY_SUPPORTED")
// #define _GNU_SOURCE
#if !defined(_GNU_SOURCE)
// #pragma message(" ==== _GNU_SOURCE set")
// we need _GNU_SOURCE for cpu_set_t, if we compile for MUSL
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define _GNU_SOURCE
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif
#endif
#endif
@@ -173,7 +182,7 @@ WRes CriticalSection_Init(CCriticalSection *p);
#else // _WIN32
typedef struct _CEvent
typedef struct
{
int _created;
int _manual_reset;
@@ -199,7 +208,7 @@ WRes Event_Wait(CEvent *p);
WRes Event_Close(CEvent *p);
typedef struct _CSemaphore
typedef struct
{
int _created;
UInt32 _count;
@@ -219,7 +228,7 @@ WRes Semaphore_Wait(CSemaphore *p);
WRes Semaphore_Close(CSemaphore *p);
typedef struct _CCriticalSection
typedef struct
{
pthread_mutex_t _mutex;
} CCriticalSection;
@@ -230,6 +239,7 @@ void CriticalSection_Enter(CCriticalSection *cs);
void CriticalSection_Leave(CCriticalSection *cs);
LONG InterlockedIncrement(LONG volatile *addend);
LONG InterlockedDecrement(LONG volatile *addend);
#endif // _WIN32

View File

@@ -1,5 +1,5 @@
/* Xz.h - Xz interface
2023-04-13 : Igor Pavlov : Public domain */
2024-01-26 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_XZ_H
#define ZIP7_INC_XZ_H
@@ -18,6 +18,7 @@ EXTERN_C_BEGIN
#define XZ_ID_ARMT 8
#define XZ_ID_SPARC 9
#define XZ_ID_ARM64 0xa
#define XZ_ID_RISCV 0xb
#define XZ_ID_LZMA2 0x21
unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value);
@@ -233,13 +234,13 @@ typedef enum
typedef struct
{
EXzState state;
UInt32 pos;
unsigned pos;
unsigned alignPos;
unsigned indexPreSize;
CXzStreamFlags streamFlags;
UInt32 blockHeaderSize;
unsigned blockHeaderSize;
UInt64 packSize;
UInt64 unpackSize;

View File

@@ -1,5 +1,5 @@
/* XzCrc64.h -- CRC64 calculation
2023-04-02 : Igor Pavlov : Public domain */
2023-12-08 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_XZ_CRC64_H
#define ZIP7_INC_XZ_CRC64_H
@@ -10,16 +10,16 @@
EXTERN_C_BEGIN
extern UInt64 g_Crc64Table[];
// extern UInt64 g_Crc64Table[];
void Z7_FASTCALL Crc64GenerateTable(void);
#define CRC64_INIT_VAL UINT64_CONST(0xFFFFFFFFFFFFFFFF)
#define CRC64_GET_DIGEST(crc) ((crc) ^ CRC64_INIT_VAL)
#define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
// #define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt64 Z7_FASTCALL Crc64Update(UInt64 crc, const void *data, size_t size);
UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size);
// UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size);
EXTERN_C_END

View File

@@ -1,5 +1,5 @@
/* 7zArcIn.c -- 7z Input functions
2023-05-11 : Igor Pavlov : Public domain */
2023-09-07 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -301,7 +301,7 @@ static SRes RememberBitVector(CSzData *sd, UInt32 numItems, const Byte **v)
static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems)
{
Byte b = 0;
unsigned b = 0;
unsigned m = 0;
UInt32 sum = 0;
for (; numItems != 0; numItems--)
@@ -312,7 +312,7 @@ static UInt32 CountDefinedBits(const Byte *bits, UInt32 numItems)
m = 8;
}
m--;
sum += ((b >> m) & 1);
sum += (UInt32)((b >> m) & 1);
}
return sum;
}

View File

@@ -1,93 +1,96 @@
/* 7zCrc.c -- CRC32 calculation and init
2023-04-02 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "7zCrc.h"
#include "CpuArch.h"
#define kCrcPoly 0xEDB88320
// for debug:
// #define __ARM_FEATURE_CRC32 1
#ifdef MY_CPU_LE
#define CRC_NUM_TABLES 8
#ifdef __ARM_FEATURE_CRC32
// #pragma message("__ARM_FEATURE_CRC32")
#define Z7_CRC_HW_FORCE
#endif
// #define Z7_CRC_DEBUG_BE
#ifdef Z7_CRC_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif
#ifdef Z7_CRC_HW_FORCE
#define Z7_CRC_NUM_TABLES_USE 1
#else
#define CRC_NUM_TABLES 9
UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#ifdef Z7_CRC_NUM_TABLES
#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES
#else
#define Z7_CRC_NUM_TABLES_USE 12
#endif
#endif
#ifndef MY_CPU_BE
UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#if Z7_CRC_NUM_TABLES_USE < 1
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
/*
extern
CRC_FUNC g_CrcUpdateT4;
CRC_FUNC g_CrcUpdateT4;
*/
extern
CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdateT8;
extern
CRC_FUNC g_CrcUpdateT0_32;
CRC_FUNC g_CrcUpdateT0_32;
extern
CRC_FUNC g_CrcUpdateT0_64;
CRC_FUNC g_CrcUpdateT0_64;
extern
CRC_FUNC g_CrcUpdate;
CRC_FUNC g_CrcUpdate;
#if defined(MY_CPU_LE) || (Z7_CRC_NUM_TABLES_USE == 1)
#define Z7_CRC_NUM_TABLES_TOTAL Z7_CRC_NUM_TABLES_USE
#else
#define Z7_CRC_NUM_TABLES_TOTAL (Z7_CRC_NUM_TABLES_USE + 1)
#endif
UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
#ifndef Z7_CRC_HW_FORCE
UInt32 Z7_FASTCALL CrcUpdate(UInt32 v, const void *data, size_t size)
{
return g_CrcUpdate(v, data, size, g_CrcTable);
}
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size)
{
return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
}
#if CRC_NUM_TABLES < 4 \
|| (CRC_NUM_TABLES == 4 && defined(MY_CPU_BE)) \
#if Z7_CRC_NUM_TABLES_USE == 1 \
|| (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
#define Z7_CRC_UPDATE_T1_FUNC_NAME CrcUpdateGT1
static UInt32 Z7_FASTCALL Z7_CRC_UPDATE_T1_FUNC_NAME(UInt32 v, const void *data, size_t size)
{
const UInt32 *table = g_CrcTable;
const Byte *p = (const Byte *)data;
const Byte *pEnd = p + size;
for (; p != pEnd; p++)
const Byte *lim = p + size;
for (; p != lim; p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
#endif
#if Z7_CRC_NUM_TABLES_USE != 1
#ifndef MY_CPU_BE
#define FUNC_NAME_LE_2(s) CrcUpdateT ## s
#define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s)
#define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC_NUM_TABLES_USE)
UInt32 Z7_FASTCALL FUNC_NAME_LE (UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
#ifndef MY_CPU_LE
#define FUNC_NAME_BE_2(s) CrcUpdateT1_BeT ## s
#define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s)
#define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC_NUM_TABLES_USE)
UInt32 Z7_FASTCALL FUNC_NAME_BE (UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
#endif
#endif // Z7_CRC_HW_FORCE
/* ---------- hardware CRC ---------- */
#ifdef MY_CPU_LE
#if defined(MY_CPU_ARM_OR_ARM64)
// #pragma message("ARM*")
#if defined(_MSC_VER) && !defined(__clang__)
#if defined(MY_CPU_ARM64)
#if (_MSC_VER >= 1910)
#ifndef __clang__
#define USE_ARM64_CRC
#include <intrin.h>
#endif
#endif
#endif
#elif (defined(__clang__) && (__clang_major__ >= 3)) \
|| (defined(__GNUC__) && (__GNUC__ > 4))
#if (defined(__clang__) && (__clang_major__ >= 3)) \
|| defined(__GNUC__) && (__GNUC__ >= 6) && defined(MY_CPU_ARM64) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#if !defined(__ARM_FEATURE_CRC32)
// #pragma message("!defined(__ARM_FEATURE_CRC32)")
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define __ARM_FEATURE_CRC32 1
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#define Z7_ARM_FEATURE_CRC32_WAS_SET
#if defined(__clang__)
#if defined(MY_CPU_ARM64)
#define ATTRIB_CRC __attribute__((__target__("crc")))
@@ -96,100 +99,120 @@ UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UI
#endif
#else
#if defined(MY_CPU_ARM64)
#if !defined(Z7_GCC_VERSION) || (Z7_GCC_VERSION >= 60000)
#define ATTRIB_CRC __attribute__((__target__("+crc")))
#endif
#else
#if !defined(Z7_GCC_VERSION) || (__GNUC__ >= 8)
#if defined(__ARM_FP) && __GNUC__ >= 8
// for -mfloat-abi=hard: similar to <arm_acle.h>
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc+simd")))
#else
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
#endif
#endif
#endif
#endif
#endif
#if defined(__ARM_FEATURE_CRC32)
#define USE_ARM64_CRC
// #pragma message("<arm_acle.h>")
/*
arm_acle.h (GGC):
before Nov 17, 2017:
#ifdef __ARM_FEATURE_CRC32
Nov 17, 2017: gcc10.0 (gcc 9.2.0) checked"
#if __ARM_ARCH >= 8
#pragma GCC target ("arch=armv8-a+crc")
Aug 22, 2019: GCC 8.4?, 9.2.1, 10.1:
#ifdef __ARM_FEATURE_CRC32
#ifdef __ARM_FP
#pragma GCC target ("arch=armv8-a+crc+simd")
#else
#pragma GCC target ("arch=armv8-a+crc")
#endif
*/
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
#if defined(Z7_GCC_VERSION) && (__GNUC__ == 8) && (Z7_GCC_VERSION < 80400) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 9) && (Z7_GCC_VERSION < 90201) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 10) && (Z7_GCC_VERSION < 100100)
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
// #pragma message("#define __ARM_ARCH 8")
#undef __ARM_ARCH
#define __ARM_ARCH 8
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif
#define Z7_CRC_HW_USE
#include <arm_acle.h>
#endif
#elif defined(_MSC_VER)
#if defined(MY_CPU_ARM64)
#if (_MSC_VER >= 1910)
#ifdef __clang__
// #define Z7_CRC_HW_USE
// #include <arm_acle.h>
#else
#define Z7_CRC_HW_USE
#include <intrin.h>
#endif
#endif
#endif
#endif
#else
#else // non-ARM*
// no hardware CRC
// #define USE_CRC_EMU
#ifdef USE_CRC_EMU
#pragma message("ARM64 CRC emulation")
Z7_FORCE_INLINE
UInt32 __crc32b(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data);
return v;
}
Z7_FORCE_INLINE
UInt32 __crc32w(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
Z7_FORCE_INLINE
UInt32 __crc32d(UInt32 v, UInt64 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
#endif // USE_CRC_EMU
#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#define T0_32_UNROLL_BYTES (4 * 4)
#define T0_64_UNROLL_BYTES (4 * 8)
#ifndef ATTRIB_CRC
#define ATTRIB_CRC
// #define Z7_CRC_HW_USE // for debug : we can test HW-branch of code
#ifdef Z7_CRC_HW_USE
#include "7zCrcEmu.h"
#endif
#endif // non-ARM*
#if defined(Z7_CRC_HW_USE)
// #pragma message("USE ARM HW CRC")
ATTRIB_CRC
UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
#ifdef MY_CPU_64BIT
#define CRC_HW_WORD_TYPE UInt64
#define CRC_HW_WORD_FUNC __crc32d
#else
#define CRC_HW_WORD_TYPE UInt32
#define CRC_HW_WORD_FUNC __crc32w
#endif
#define CRC_HW_UNROLL_BYTES (sizeof(CRC_HW_WORD_TYPE) * 4)
#ifdef ATTRIB_CRC
ATTRIB_CRC
#endif
Z7_NO_INLINE
#ifdef Z7_CRC_HW_FORCE
UInt32 Z7_FASTCALL CrcUpdate
#else
static UInt32 Z7_FASTCALL CrcUpdate_HW
#endif
(UInt32 v, const void *data, size_t size)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (CRC_HW_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_32_UNROLL_BYTES)
if (size >= CRC_HW_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_32_UNROLL_BYTES - 1);
size &= CRC_HW_UNROLL_BYTES - 1;
lim -= size;
do
{
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
p += 2 * sizeof(CRC_HW_WORD_TYPE);
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
p += 2 * sizeof(CRC_HW_WORD_TYPE);
}
while (p != lim);
}
@@ -200,141 +223,198 @@ UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const
return v;
}
ATTRIB_CRC
UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_64_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_64_UNROLL_BYTES - 1);
lim -= size;
do
{
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
}
while (p != lim);
}
for (; size != 0; size--)
v = __crc32b(v, *p++);
return v;
}
#undef T0_32_UNROLL_BYTES
#undef T0_64_UNROLL_BYTES
#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#ifdef Z7_ARM_FEATURE_CRC32_WAS_SET
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#undef __ARM_FEATURE_CRC32
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#undef Z7_ARM_FEATURE_CRC32_WAS_SET
#endif
#endif // defined(Z7_CRC_HW_USE)
#endif // MY_CPU_LE
#ifndef Z7_CRC_HW_FORCE
#if defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
/*
typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_WITH_TABLE_FUNC)
(UInt32 v, const void *data, size_t size, const UInt32 *table);
Z7_CRC_UPDATE_WITH_TABLE_FUNC g_CrcUpdate;
*/
static unsigned g_Crc_Algo;
#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
static unsigned g_Crc_Be;
#endif
#endif // defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
Z7_NO_INLINE
#ifdef Z7_CRC_HW_USE
static UInt32 Z7_FASTCALL CrcUpdate_Base
#else
UInt32 Z7_FASTCALL CrcUpdate
#endif
(UInt32 crc, const void *data, size_t size)
{
#if Z7_CRC_NUM_TABLES_USE == 1
return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
#else // Z7_CRC_NUM_TABLES_USE != 1
#ifdef Z7_CRC_UPDATE_T1_FUNC_NAME
if (g_Crc_Algo == 1)
return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
#endif
#ifdef MY_CPU_LE
return FUNC_NAME_LE(crc, data, size, g_CrcTable);
#elif defined(MY_CPU_BE)
return FUNC_NAME_BE(crc, data, size, g_CrcTable);
#else
if (g_Crc_Be)
return FUNC_NAME_BE(crc, data, size, g_CrcTable);
else
return FUNC_NAME_LE(crc, data, size, g_CrcTable);
#endif
#endif // Z7_CRC_NUM_TABLES_USE != 1
}
#ifdef Z7_CRC_HW_USE
Z7_NO_INLINE
UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size)
{
if (g_Crc_Algo == 0)
return CrcUpdate_HW(crc, data, size);
return CrcUpdate_Base(crc, data, size);
}
#endif
#endif // !defined(Z7_CRC_HW_FORCE)
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size)
{
return CrcUpdate(CRC_INIT_VAL, data, size) ^ CRC_INIT_VAL;
}
MY_ALIGN(64)
UInt32 g_CrcTable[256 * Z7_CRC_NUM_TABLES_TOTAL];
void Z7_FASTCALL CrcGenerateTable(void)
{
UInt32 i;
for (i = 0; i < 256; i++)
{
#if defined(Z7_CRC_HW_FORCE)
g_CrcTable[i] = __crc32b(i, 0);
#else
#define kCrcPoly 0xEDB88320
UInt32 r = i;
unsigned j;
for (j = 0; j < 8; j++)
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
g_CrcTable[i] = r;
#endif
}
for (i = 256; i < 256 * CRC_NUM_TABLES; i++)
for (i = 256; i < 256 * Z7_CRC_NUM_TABLES_USE; i++)
{
const UInt32 r = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
}
#if CRC_NUM_TABLES < 4
g_CrcUpdate = CrcUpdateT1;
#elif defined(MY_CPU_LE)
// g_CrcUpdateT4 = CrcUpdateT4;
#if CRC_NUM_TABLES < 8
g_CrcUpdate = CrcUpdateT4;
#else // CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8;
/*
#ifdef MY_CPU_X86_OR_AMD64
if (!CPU_Is_InOrder())
#endif
*/
g_CrcUpdate = CrcUpdateT8;
#endif
#else
#if !defined(Z7_CRC_HW_FORCE) && \
(defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) || defined(MY_CPU_BE))
#if Z7_CRC_NUM_TABLES_USE <= 1
g_Crc_Algo = 1;
#else // Z7_CRC_NUM_TABLES_USE <= 1
#if defined(MY_CPU_LE)
g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
#else // !defined(MY_CPU_LE)
{
#ifndef MY_CPU_BE
#ifndef MY_CPU_BE
UInt32 k = 0x01020304;
const Byte *p = (const Byte *)&k;
if (p[0] == 4 && p[1] == 3)
{
#if CRC_NUM_TABLES < 8
// g_CrcUpdateT4 = CrcUpdateT4;
g_CrcUpdate = CrcUpdateT4;
#else // CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8;
g_CrcUpdate = CrcUpdateT8;
#endif
}
g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
else if (p[0] != 1 || p[1] != 2)
g_CrcUpdate = CrcUpdateT1;
g_Crc_Algo = 1;
else
#endif // MY_CPU_BE
#endif // MY_CPU_BE
{
for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--)
for (i = 256 * Z7_CRC_NUM_TABLES_TOTAL - 1; i >= 256; i--)
{
const UInt32 x = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = Z7_BSWAP32(x);
}
#if CRC_NUM_TABLES <= 4
g_CrcUpdate = CrcUpdateT1;
#elif CRC_NUM_TABLES <= 8
// g_CrcUpdateT4 = CrcUpdateT1_BeT4;
g_CrcUpdate = CrcUpdateT1_BeT4;
#else // CRC_NUM_TABLES > 8
g_CrcUpdateT8 = CrcUpdateT1_BeT8;
g_CrcUpdate = CrcUpdateT1_BeT8;
#endif
#if defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
#endif
#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
g_Crc_Be = 1;
#endif
}
}
#endif // CRC_NUM_TABLES < 4
#endif // !defined(MY_CPU_LE)
#ifdef MY_CPU_LE
#ifdef USE_ARM64_CRC
if (CPU_IsSupported_CRC32())
{
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate =
#if defined(MY_CPU_ARM)
CrcUpdateT0_32;
#else
CrcUpdateT0_64;
#endif
}
#endif
#ifdef USE_CRC_EMU
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate = CrcUpdateT0_64;
#endif
#ifdef MY_CPU_LE
#ifdef Z7_CRC_HW_USE
if (CPU_IsSupported_CRC32())
g_Crc_Algo = 0;
#endif // Z7_CRC_HW_USE
#endif // MY_CPU_LE
#endif // Z7_CRC_NUM_TABLES_USE <= 1
#endif // g_Crc_Algo was declared
}
Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo)
{
if (algo == 0)
return &CrcUpdate;
#if defined(Z7_CRC_HW_USE)
if (algo == sizeof(CRC_HW_WORD_TYPE) * 8)
{
#ifdef Z7_CRC_HW_FORCE
return &CrcUpdate;
#else
if (g_Crc_Algo == 0)
return &CrcUpdate_HW;
#endif
}
#endif
#ifndef Z7_CRC_HW_FORCE
if (algo == Z7_CRC_NUM_TABLES_USE)
return
#ifdef Z7_CRC_HW_USE
&CrcUpdate_Base;
#else
&CrcUpdate;
#endif
#endif
return NULL;
}
#undef kCrcPoly
#undef CRC64_NUM_TABLES
#undef Z7_CRC_NUM_TABLES_USE
#undef Z7_CRC_NUM_TABLES_TOTAL
#undef CRC_UPDATE_BYTE_2
#undef FUNC_NAME_LE_2
#undef FUNC_NAME_LE_1
#undef FUNC_NAME_LE
#undef FUNC_NAME_BE_2
#undef FUNC_NAME_BE_1
#undef FUNC_NAME_BE
#undef CRC_HW_UNROLL_BYTES
#undef CRC_HW_WORD_FUNC
#undef CRC_HW_WORD_TYPE

View File

@@ -1,117 +1,199 @@
/* 7zCrcOpt.c -- CRC32 calculation
2023-04-02 : Igor Pavlov : Public domain */
/* 7zCrcOpt.c -- CRC32 calculation (optimized functions)
2023-12-07 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#if !defined(Z7_CRC_NUM_TABLES) || Z7_CRC_NUM_TABLES > 1
// for debug only : define Z7_CRC_DEBUG_BE to test big-endian code in little-endian cpu
// #define Z7_CRC_DEBUG_BE
#ifdef Z7_CRC_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif
// the value Z7_CRC_NUM_TABLES_USE must be defined to same value as in 7zCrc.c
#ifdef Z7_CRC_NUM_TABLES
#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES
#else
#define Z7_CRC_NUM_TABLES_USE 12
#endif
#if Z7_CRC_NUM_TABLES_USE % 4 || \
Z7_CRC_NUM_TABLES_USE < 4 * 1 || \
Z7_CRC_NUM_TABLES_USE > 4 * 6
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
#ifndef MY_CPU_BE
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
#define Q(n, d) \
( (table + ((n) * 4 + 3) * 0x100)[(Byte)(d)] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
#define R(a) *((const UInt32 *)(const void *)p + (a))
#define CRC_FUNC_PRE_LE2(step) \
UInt32 Z7_FASTCALL CrcUpdateT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table)
#define CRC_FUNC_PRE_LE(step) \
CRC_FUNC_PRE_LE2(step); \
CRC_FUNC_PRE_LE2(step)
CRC_FUNC_PRE_LE(Z7_CRC_NUM_TABLES_USE)
{
const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
const Byte *lim;
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
lim = p + size;
if (size >= Z7_CRC_NUM_TABLES_USE)
{
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x300)[((v ) & 0xFF)]
^ (table + 0x200)[((v >> 8) & 0xFF)]
^ (table + 0x100)[((v >> 16) & 0xFF)]
^ (table + 0x000)[((v >> 24))];
lim -= Z7_CRC_NUM_TABLES_USE;
do
{
v ^= R(0);
{
#if Z7_CRC_NUM_TABLES_USE == 1 * 4
v = Q(0, v);
#else
#define U2(r, op) \
{ d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); }
UInt32 d, x;
U2(1, =)
#if Z7_CRC_NUM_TABLES_USE >= 3 * 4
#define U(r) U2(r, ^=)
U(2)
#if Z7_CRC_NUM_TABLES_USE >= 4 * 4
U(3)
#if Z7_CRC_NUM_TABLES_USE >= 5 * 4
U(4)
#if Z7_CRC_NUM_TABLES_USE >= 6 * 4
U(5)
#if Z7_CRC_NUM_TABLES_USE >= 7 * 4
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
#endif
#endif
#endif
#endif
#undef U
#undef U2
v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v);
#endif
}
p += Z7_CRC_NUM_TABLES_USE;
}
while (p <= lim);
lim += Z7_CRC_NUM_TABLES_USE;
}
for (; size > 0; size--, p++)
for (; p < lim; p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x700)[((v ) & 0xFF)]
^ (table + 0x600)[((v >> 8) & 0xFF)]
^ (table + 0x500)[((v >> 16) & 0xFF)]
^ (table + 0x400)[((v >> 24))];
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
^ (table + 0x100)[((d >> 16) & 0xFF)]
^ (table + 0x000)[((d >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
#undef CRC_UPDATE_BYTE_2
#undef R
#undef Q
#undef CRC_FUNC_PRE_LE
#undef CRC_FUNC_PRE_LE2
#endif
#ifndef MY_CPU_LE
#define CRC_UINT32_SWAP(v) Z7_BSWAP32(v)
#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 24) ^ (b)] ^ ((crc) << 8))
#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8))
#define Q(n, d) \
( (table + ((n) * 4 + 0) * 0x100)[((d)) & 0xFF] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
#ifdef Z7_CRC_DEBUG_BE
#define R(a) GetBe32a((const UInt32 *)(const void *)p + (a))
#else
#define R(a) *((const UInt32 *)(const void *)p + (a))
#endif
#define CRC_FUNC_PRE_BE2(step) \
UInt32 Z7_FASTCALL CrcUpdateT1_BeT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table)
#define CRC_FUNC_PRE_BE(step) \
CRC_FUNC_PRE_BE2(step); \
CRC_FUNC_PRE_BE2(step)
CRC_FUNC_PRE_BE(Z7_CRC_NUM_TABLES_USE)
{
const Byte *p = (const Byte *)data;
const Byte *lim;
table += 0x100;
v = CRC_UINT32_SWAP(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
v = Z7_BSWAP32(v);
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
lim = p + size;
if (size >= Z7_CRC_NUM_TABLES_USE)
{
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x000)[((v ) & 0xFF)]
^ (table + 0x100)[((v >> 8) & 0xFF)]
^ (table + 0x200)[((v >> 16) & 0xFF)]
^ (table + 0x300)[((v >> 24))];
lim -= Z7_CRC_NUM_TABLES_USE;
do
{
v ^= R(0);
{
#if Z7_CRC_NUM_TABLES_USE == 1 * 4
v = Q(0, v);
#else
#define U2(r, op) \
{ d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); }
UInt32 d, x;
U2(1, =)
#if Z7_CRC_NUM_TABLES_USE >= 3 * 4
#define U(r) U2(r, ^=)
U(2)
#if Z7_CRC_NUM_TABLES_USE >= 4 * 4
U(3)
#if Z7_CRC_NUM_TABLES_USE >= 5 * 4
U(4)
#if Z7_CRC_NUM_TABLES_USE >= 6 * 4
U(5)
#if Z7_CRC_NUM_TABLES_USE >= 7 * 4
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
#endif
#endif
#endif
#endif
#undef U
#undef U2
v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v);
#endif
}
p += Z7_CRC_NUM_TABLES_USE;
}
while (p <= lim);
lim += Z7_CRC_NUM_TABLES_USE;
}
for (; size > 0; size--, p++)
for (; p < lim; p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
return CRC_UINT32_SWAP(v);
return Z7_BSWAP32(v);
}
UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
table += 0x100;
v = CRC_UINT32_SWAP(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x400)[((v ) & 0xFF)]
^ (table + 0x500)[((v >> 8) & 0xFF)]
^ (table + 0x600)[((v >> 16) & 0xFF)]
^ (table + 0x700)[((v >> 24))];
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]
^ (table + 0x200)[((d >> 16) & 0xFF)]
^ (table + 0x300)[((d >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
return CRC_UINT32_SWAP(v);
}
#undef CRC_UPDATE_BYTE_2_BE
#undef R
#undef Q
#undef CRC_FUNC_PRE_BE
#undef CRC_FUNC_PRE_BE2
#endif
#undef Z7_CRC_NUM_TABLES_USE
#endif

View File

@@ -1,5 +1,5 @@
/* 7zDec.c -- Decoding from 7z folder
2023-04-02 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -51,6 +51,7 @@
#ifndef Z7_NO_METHODS_FILTERS
#define k_Delta 3
#define k_RISCV 0xb
#define k_BCJ 0x3030103
#define k_PPC 0x3030205
#define k_IA64 0x3030401
@@ -362,6 +363,7 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
case k_IA64:
case k_SPARC:
case k_ARM:
case k_RISCV:
#endif
#ifdef Z7_USE_FILTER_ARM64
case k_ARM64:
@@ -535,10 +537,10 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
}
}
}
#if defined(Z7_USE_BRANCH_FILTER)
#if defined(Z7_USE_BRANCH_FILTER)
else if (ci == 1)
{
#if !defined(Z7_NO_METHODS_FILTERS)
#if !defined(Z7_NO_METHODS_FILTERS)
if (coder->MethodID == k_Delta)
{
if (coder->PropsSize != 1)
@@ -550,22 +552,43 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
}
continue;
}
#endif
#endif
#ifdef Z7_USE_FILTER_ARM64
#ifdef Z7_USE_FILTER_ARM64
if (coder->MethodID == k_ARM64)
{
UInt32 pc = 0;
if (coder->PropsSize == 4)
{
pc = GetUi32(propsData + coder->PropsOffset);
if (pc & 3)
return SZ_ERROR_UNSUPPORTED;
}
else if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED;
z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc);
continue;
}
#endif
#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT)
#endif
#if !defined(Z7_NO_METHODS_FILTERS)
if (coder->MethodID == k_RISCV)
{
UInt32 pc = 0;
if (coder->PropsSize == 4)
{
pc = GetUi32(propsData + coder->PropsOffset);
if (pc & 1)
return SZ_ERROR_UNSUPPORTED;
}
else if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED;
z7_BranchConv_RISCV_Dec(outBuffer, outSize, pc);
continue;
}
#endif
#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT)
{
if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED;
@@ -579,7 +602,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0
break;
}
CASE_BRA_CONV(PPC)
case k_PPC: Z7_BRANCH_CONV_DEC_2(BranchConv_PPC)(outBuffer, outSize, 0); break; // pc = 0;
// CASE_BRA_CONV(PPC)
CASE_BRA_CONV(IA64)
CASE_BRA_CONV(SPARC)
CASE_BRA_CONV(ARM)
@@ -592,9 +616,9 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
}
continue;
}
#endif
#endif
} // (c == 1)
#endif
#endif // Z7_USE_BRANCH_FILTER
else
return SZ_ERROR_UNSUPPORTED;
}

View File

@@ -1,5 +1,5 @@
/* Aes.c -- AES encryption / decryption
2023-04-02 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -13,7 +13,9 @@ AES_CODE_FUNC g_AesCtr_Code;
UInt32 g_Aes_SupportedFunctions_Flags;
#endif
MY_ALIGN(64)
static UInt32 T[256 * 4];
MY_ALIGN(64)
static const Byte Sbox[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
@@ -33,7 +35,9 @@ static const Byte Sbox[256] = {
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
MY_ALIGN(64)
static UInt32 D[256 * 4];
MY_ALIGN(64)
static Byte InvS[256];
#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
@@ -54,24 +58,54 @@ static Byte InvS[256];
// #define Z7_SHOW_AES_STATUS
#ifdef MY_CPU_X86_OR_AMD64
#define USE_HW_AES
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_AES
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#if defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1110)
#define USE_HW_AES
#if (__INTEL_COMPILER >= 1900)
#define USE_HW_VAES
#endif
#endif
#elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400)
#define USE_HW_AES
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#define USE_HW_VAES
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_AES
#define USE_HW_VAES
#endif
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__ARM_FEATURE_AES) \
|| defined(__ARM_FEATURE_CRYPTO)
#define USE_HW_AES
#else
#if defined(MY_CPU_ARM64) \
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|| defined(Z7_MSC_VER_ORIGINAL)
#if defined(__ARM_FP) && \
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ >= 6) \
) \
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON) && \
(Z7_CLANG_VERSION < 170000 || \
Z7_CLANG_VERSION > 170001)
#define USE_HW_AES
#endif
#endif
#endif
#endif
#endif
#ifdef USE_HW_AES
// #pragma message("=== Aes.c USE_HW_AES === ")
#ifdef Z7_SHOW_AES_STATUS
#include <stdio.h>
#define PRF(x) x
@@ -136,6 +170,7 @@ void AesGenTables(void)
#endif
#ifdef MY_CPU_X86_OR_AMD64
#ifdef USE_HW_VAES
if (CPU_IsSupported_VAES_AVX2())
{
PRF(printf("\n===vaes avx2\n"));
@@ -146,6 +181,7 @@ void AesGenTables(void)
#endif
}
#endif
#endif
}
#endif

View File

@@ -1,5 +1,5 @@
/* AesOpt.c -- AES optimized code for x86 AES hardware instructions
2023-04-02 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -15,8 +15,8 @@
#define USE_INTEL_VAES
#endif
#endif
#elif defined(__clang__) && (__clang_major__ > 3 || __clang_major__ == 3 && __clang_minor__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4)
#elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400)
#define USE_INTEL_AES
#if !defined(__AES__)
#define ATTRIB_AES __attribute__((__target__("aes")))
@@ -35,27 +35,37 @@
#define USE_INTEL_VAES
#endif
#endif
#ifndef USE_INTEL_AES
#define Z7_USE_AES_HW_STUB
#endif
#ifndef USE_INTEL_VAES
#define Z7_USE_VAES_HW_STUB
#endif
#endif
#ifndef ATTRIB_AES
#define ATTRIB_AES
#endif
#ifndef ATTRIB_VAES
#define ATTRIB_VAES
#endif
#ifndef USE_INTEL_AES
// #define Z7_USE_AES_HW_STUB // for debug
#endif
#ifndef USE_INTEL_VAES
// #define Z7_USE_VAES_HW_STUB // for debug
#endif
#ifdef USE_INTEL_AES
#include <wmmintrin.h>
#ifndef USE_INTEL_VAES
#if !defined(USE_INTEL_VAES) && defined(Z7_USE_VAES_HW_STUB)
#define AES_TYPE_keys UInt32
#define AES_TYPE_data Byte
// #define AES_TYPE_keys __m128i
// #define AES_TYPE_data __m128i
#endif
#ifndef ATTRIB_AES
#define ATTRIB_AES
#endif
#define AES_FUNC_START(name) \
void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks)
// void Z7_FASTCALL name(__m128i *p, __m128i *data, size_t numBlocks)
@@ -69,8 +79,6 @@ AES_FUNC_START (name)
#define MM_OP_m(op, src) MM_OP(op, m, src)
#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src)
#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src)
AES_FUNC_START2 (AesCbc_Encode_HW)
{
@@ -139,11 +147,6 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1])
#endif
#define AVX_DECLARE_VAR(reg, ii) __m256i reg;
#define AVX_LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii];
#define AVX_STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg;
#define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]))
#define MM_OP_key(op, reg) MM_OP(op, reg, key);
#define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg)
@@ -152,27 +155,13 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
#define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg)
#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg)
#define AVX_AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg)
#define AVX_AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg)
#define AVX_AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg)
#define AVX_AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg)
#define AVX_AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg)
#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one) reg = ctr;
#define CTR_END( reg, ii) MM_XOR (data[ii], reg)
#define AVX_CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two) reg = _mm256_xor_si256(ctr2, key);
#define AVX_CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg)
#define WOP_KEY(op, n) { \
const __m128i key = w[n]; \
WOP(op); }
#define AVX_WOP_KEY(op, n) { \
const __m256i key = w[n]; \
WOP(op); }
#define WIDE_LOOP_START \
dataEnd = data + numBlocks; \
@@ -190,6 +179,40 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
for (; data < dataEnd; data++)
#ifdef USE_INTEL_VAES
#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src)
#define AVX_DECLARE_VAR(reg, ii) __m256i reg;
#define AVX_LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii];
#define AVX_STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg;
/*
AVX_XOR_data_M1() needs unaligned memory load
if (we don't use _mm256_loadu_si256() here)
{
Most compilers with enabled optimizations generate fused AVX (LOAD + OP)
instruction that can load unaligned data.
But GCC and CLANG without -O2 or -O1 optimizations can generate separated
LOAD-ALIGNED (vmovdqa) instruction that will fail on execution.
}
Note: some compilers generate more instructions, if we use _mm256_loadu_si256() here.
v23.02: we use _mm256_loadu_si256() here, because we need compatibility with any compiler.
*/
#define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, _mm256_loadu_si256(&(((const __m256i *)(const void *)(data - 1))[ii])))
// for debug only: the following code will fail on execution, if compiled by some compilers:
// #define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]))
#define AVX_AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg)
#define AVX_AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg)
#define AVX_AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg)
#define AVX_AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg)
#define AVX_AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg)
#define AVX_CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two) reg = _mm256_xor_si256(ctr2, key);
#define AVX_CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg)
#define AVX_WOP_KEY(op, n) { \
const __m256i key = w[n]; \
WOP(op); }
#define NUM_AES_KEYS_MAX 15
#define WIDE_LOOP_START_AVX(OP) \
@@ -214,6 +237,9 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified,
MSVC still can insert vzeroupper instruction. */
#endif
AES_FUNC_START2 (AesCbc_Decode_HW)
{
@@ -380,6 +406,9 @@ required that <immintrin.h> must be included before <avxintrin.h>.
#endif
#endif // __clang__ && _MSC_VER
#ifndef ATTRIB_VAES
#define ATTRIB_VAES
#endif
#define VAES_FUNC_START2(name) \
AES_FUNC_START (name); \
@@ -519,10 +548,18 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256)
/* no USE_INTEL_AES */
#if defined(Z7_USE_AES_HW_STUB)
// We can compile this file with another C compiler,
// or we can compile asm version.
// So we can generate real code instead of this stub function.
// #if defined(_MSC_VER)
#pragma message("AES HW_SW stub was used")
// #endif
#if !defined(USE_INTEL_VAES) && defined(Z7_USE_VAES_HW_STUB)
#define AES_TYPE_keys UInt32
#define AES_TYPE_data Byte
#endif
#define AES_FUNC_START(name) \
void Z7_FASTCALL name(UInt32 *p, Byte *data, size_t numBlocks) \
@@ -535,13 +572,16 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256)
AES_COMPAT_STUB (AesCbc_Encode)
AES_COMPAT_STUB (AesCbc_Decode)
AES_COMPAT_STUB (AesCtr_Code)
#endif // Z7_USE_AES_HW_STUB
#endif // USE_INTEL_AES
#ifndef USE_INTEL_VAES
#if defined(Z7_USE_VAES_HW_STUB)
// #if defined(_MSC_VER)
#pragma message("VAES HW_SW stub was used")
// #endif
#define VAES_COMPAT_STUB(name) \
void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \
@@ -550,36 +590,59 @@ AES_COMPAT_STUB (AesCtr_Code)
VAES_COMPAT_STUB (AesCbc_Decode_HW)
VAES_COMPAT_STUB (AesCtr_Code_HW)
#endif
#endif // ! USE_INTEL_VAES
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#if defined(__ARM_FEATURE_AES) \
|| defined(__ARM_FEATURE_CRYPTO)
#define USE_HW_AES
#else
#if defined(MY_CPU_ARM64) \
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|| defined(Z7_MSC_VER_ORIGINAL)
#if defined(__ARM_FP) && \
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ >= 6) \
) \
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON) && \
(Z7_CLANG_VERSION < 170000 || \
Z7_CLANG_VERSION > 170001)
#define USE_HW_AES
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_AES
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_AES
#endif
#endif
#ifdef USE_HW_AES
// #pragma message("=== AES HW === ")
// __ARM_FEATURE_CRYPTO macro is deprecated in favor of the finer grained feature macro __ARM_FEATURE_AES
#if defined(__clang__) || defined(__GNUC__)
#if !defined(__ARM_FEATURE_AES) && \
!defined(__ARM_FEATURE_CRYPTO)
#ifdef MY_CPU_ARM64
#define ATTRIB_AES __attribute__((__target__("+crypto,aes")))
#if defined(__clang__)
#define ATTRIB_AES __attribute__((__target__("crypto")))
#else
#define ATTRIB_AES __attribute__((__target__("+crypto")))
#endif
#else
#if defined(__clang__)
#define ATTRIB_AES __attribute__((__target__("armv8-a,aes")))
#else
#define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#endif
#endif
#else
// _MSC_VER
// for arm32
@@ -590,11 +653,59 @@ VAES_COMPAT_STUB (AesCtr_Code_HW)
#define ATTRIB_AES
#endif
#if defined(_MSC_VER) && !defined(__clang__) && defined(MY_CPU_ARM64)
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
/*
clang-17.0.1: error : Cannot select: intrinsic %llvm.arm.neon.aese
clang
3.8.1 : __ARM_NEON : defined(__ARM_FEATURE_CRYPTO)
7.0.1 : __ARM_NEON : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)
11.?.0 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_CRYPTO)
13.0.1 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8 && defined(__ARM_FEATURE_AES)
16 : __ARM_NEON && __ARM_FP : __ARM_ARCH >= 8
*/
#if defined(__clang__) && __clang_major__ < 16
#if !defined(__ARM_FEATURE_AES) && \
!defined(__ARM_FEATURE_CRYPTO)
// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
// #if defined(__clang__) && __clang_major__ < 13
#define __ARM_FEATURE_CRYPTO 1
// #else
#define __ARM_FEATURE_AES 1
// #endif
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // clang
#if defined(__clang__)
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
// #pragma message("#define __ARM_ARCH 8")
#undef __ARM_ARCH
#define __ARM_ARCH 8
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // clang
#include <arm_neon.h>
#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
defined(__ARM_FEATURE_CRYPTO) && \
defined(__ARM_FEATURE_AES)
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#undef __ARM_FEATURE_CRYPTO
#undef __ARM_FEATURE_AES
#undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
#endif
#endif // Z7_MSC_VER_ORIGINAL
typedef uint8x16_t v128;
@@ -620,7 +731,7 @@ AES_FUNC_START (name)
AES_FUNC_START2 (AesCbc_Encode_HW)
{
v128 *p = (v128*)(void*)ivAes;
v128 * const p = (v128*)(void*)ivAes;
v128 *data = (v128*)(void*)data8;
v128 m = *p;
const v128 k0 = p[2];
@@ -639,7 +750,7 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
const v128 k_z0 = w[2];
for (; numBlocks != 0; numBlocks--, data++)
{
MM_XOR_m (*data);
MM_XOR_m (*data)
AES_E_MC_m (k0)
AES_E_MC_m (k1)
AES_E_MC_m (k2)
@@ -660,7 +771,7 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
}
}
AES_E_m (k_z1)
MM_XOR_m (k_z0);
MM_XOR_m (k_z0)
*data = m;
}
*p = m;
@@ -745,7 +856,7 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
while (w != p);
WOP_KEY (AES_D, 1)
WOP_KEY (AES_XOR, 0)
MM_XOR (m0, iv);
MM_XOR (m0, iv)
WOP_M1 (XOR_data_M1)
iv = data[NUM_WAYS - 1];
WOP (STORE_data)
@@ -759,14 +870,14 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
AES_D_IMC_m (w[2])
do
{
AES_D_IMC_m (w[1]);
AES_D_IMC_m (w[0]);
AES_D_IMC_m (w[1])
AES_D_IMC_m (w[0])
w -= 2;
}
while (w != p);
AES_D_m (w[1]);
MM_XOR_m (w[0]);
MM_XOR_m (iv);
AES_D_m (w[1])
MM_XOR_m (w[0])
MM_XOR_m (iv)
iv = *data;
*data = m;
}
@@ -783,6 +894,12 @@ AES_FUNC_START2 (AesCtr_Code_HW)
const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
const v128 *dataEnd;
uint64x2_t one = vdupq_n_u64(0);
// the bug in clang:
// __builtin_neon_vsetq_lane_i64(__s0, (int8x16_t)__s1, __p2);
#if defined(__clang__) && (__clang_major__ <= 9)
#pragma GCC diagnostic ignored "-Wvector-conversion"
#endif
one = vsetq_lane_u64(1, one, 0);
p += 2;
@@ -809,11 +926,11 @@ AES_FUNC_START2 (AesCtr_Code_HW)
{
const v128 *w = p;
v128 m;
CTR_START (m, 0);
CTR_START (m, 0)
do
{
AES_E_MC_m (w[0]);
AES_E_MC_m (w[1]);
AES_E_MC_m (w[0])
AES_E_MC_m (w[1])
w += 2;
}
while (w != wEnd);

View File

@@ -1,5 +1,5 @@
/* Alloc.c -- Memory allocation functions
2023-04-02 : Igor Pavlov : Public domain */
2024-02-18 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -10,19 +10,18 @@
#include "Alloc.h"
#ifdef _WIN32
#ifdef Z7_LARGE_PAGES
#if defined(__clang__) || defined(__GNUC__)
typedef void (*Z7_voidFunction)(void);
#define MY_CAST_FUNC (Z7_voidFunction)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define MY_CAST_FUNC (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define MY_CAST_FUNC
#if defined(Z7_LARGE_PAGES) && defined(_WIN32) && \
(!defined(Z7_WIN32_WINNT_MIN) || Z7_WIN32_WINNT_MIN < 0x0502) // < Win2003 (xp-64)
#define Z7_USE_DYN_GetLargePageMinimum
#endif
// for debug:
#if 0
#if defined(__CHERI__) && defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
// #pragma message("=== Z7_ALLOC_NO_OFFSET_ALLOCATOR === ")
#define Z7_ALLOC_NO_OFFSET_ALLOCATOR
#endif
#endif
#endif // Z7_LARGE_PAGES
#endif // _WIN32
// #define SZ_ALLOC_DEBUG
/* #define SZ_ALLOC_DEBUG */
@@ -146,7 +145,9 @@ static void PrintAddr(void *p)
#define PRINT_FREE(name, cnt, ptr)
#define Print(s)
#define PrintLn()
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
#define PrintHex(v, align)
#endif
#define PrintAddr(p)
#endif
@@ -246,9 +247,9 @@ void MidFree(void *address)
#ifdef Z7_LARGE_PAGES
#ifdef MEM_LARGE_PAGES
#define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
#define MY_MEM_LARGE_PAGES MEM_LARGE_PAGES
#else
#define MY__MEM_LARGE_PAGES 0x20000000
#define MY_MEM_LARGE_PAGES 0x20000000
#endif
extern
@@ -258,19 +259,23 @@ typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
void SetLargePageSize(void)
{
#ifdef Z7_LARGE_PAGES
SIZE_T size;
#ifdef Z7_USE_DYN_GetLargePageMinimum
Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
const
Func_GetLargePageMinimum fn =
(Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
(Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
"GetLargePageMinimum");
if (!fn)
return;
size = fn();
#else
size = GetLargePageMinimum();
#endif
if (size == 0 || (size & (size - 1)) != 0)
return;
g_LargePageSize = size;
#endif
}
#endif // Z7_LARGE_PAGES
@@ -292,7 +297,7 @@ void *BigAlloc(size_t size)
size2 = (size + ps) & ~ps;
if (size2 >= size)
{
void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE);
if (p)
{
PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
@@ -328,20 +333,7 @@ const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
#endif
/*
uintptr_t : <stdint.h> C99 (optional)
: unsupported in VS6
*/
#ifdef _WIN32
typedef UINT_PTR UIntPtr;
#else
/*
typedef uintptr_t UIntPtr;
*/
typedef ptrdiff_t UIntPtr;
#endif
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
#define ADJUST_ALLOC_SIZE 0
/*
@@ -352,14 +344,36 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
MyAlloc() can return address that is NOT multiple of sizeof(void *).
*/
/*
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
uintptr_t : <stdint.h> C99 (optional)
: unsupported in VS6
*/
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
typedef
#ifdef _WIN32
UINT_PTR
#elif 1
uintptr_t
#else
ptrdiff_t
#endif
MY_uintptr_t;
#if 0 \
|| (defined(__CHERI__) \
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > 8))
// for 128-bit pointers (cheri):
#define MY_ALIGN_PTR_DOWN(p, align) \
((void *)((char *)(p) - ((size_t)(MY_uintptr_t)(p) & ((align) - 1))))
#else
#define MY_ALIGN_PTR_DOWN(p, align) \
((void *)((((MY_uintptr_t)(p)) & ~((MY_uintptr_t)(align) - 1))))
#endif
#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
#endif
#if !defined(_WIN32) \
&& (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \
|| defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L))
#define USE_posix_memalign
#endif
@@ -399,14 +413,13 @@ static int posix_memalign(void **ptr, size_t align, size_t size)
#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
void *z7_AlignedAlloc(size_t size)
{
#ifndef USE_posix_memalign
#ifndef USE_posix_memalign
void *p;
void *pAligned;
size_t newSize;
UNUSED_VAR(pp)
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */
@@ -431,10 +444,9 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
return pAligned;
#else
#else
void *p;
UNUSED_VAR(pp)
if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
return NULL;
@@ -443,19 +455,37 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
return p;
#endif
#endif
}
void z7_AlignedFree(void *address)
{
#ifndef USE_posix_memalign
if (address)
MyFree(((void **)address)[-1]);
#else
free(address);
#endif
}
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
{
UNUSED_VAR(pp)
return z7_AlignedAlloc(size);
}
static void SzAlignedFree(ISzAllocPtr pp, void *address)
{
UNUSED_VAR(pp)
#ifndef USE_posix_memalign
#ifndef USE_posix_memalign
if (address)
MyFree(((void **)address)[-1]);
#else
#else
free(address);
#endif
#endif
}
@@ -463,16 +493,44 @@ const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
/*
#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
*/
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
#if 1
#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
#else
// we can use this simplified code,
// if (CAlignOffsetAlloc::offset == (k * sizeof(void *))
#define REAL_BLOCK_PTR_VAR(p) (((void **)(p))[-1])
#endif
#endif
#if 0
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
#include <stdio.h>
static void PrintPtr(const char *s, const void *p)
{
const Byte *p2 = (const Byte *)&p;
unsigned i;
printf("%s %p ", s, p);
for (i = sizeof(p); i != 0;)
{
i--;
printf("%02x", p2[i]);
}
printf("\n");
}
#endif
#endif
static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
{
#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
UNUSED_VAR(pp)
return z7_AlignedAlloc(size);
#else
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
void *adr;
void *pAligned;
@@ -501,6 +559,12 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
#if 0
printf("\nalignSize = %6x, offset=%6x, size=%8x \n", (unsigned)alignSize, (unsigned)p->offset, (unsigned)size);
PrintPtr("base", adr);
PrintPtr("alig", pAligned);
#endif
PrintLn();
Print("- Aligned: ");
Print(" size="); PrintHex(size, 8);
@@ -512,11 +576,16 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
REAL_BLOCK_PTR_VAR(pAligned) = adr;
return pAligned;
#endif
}
static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
{
#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
UNUSED_VAR(pp)
z7_AlignedFree(address);
#else
if (address)
{
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
@@ -525,6 +594,7 @@ static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
PrintLn();
ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
}
#endif
}

View File

@@ -1,11 +1,11 @@
/* Bra.c -- Branch converters for RISC code
2023-04-02 : Igor Pavlov : Public domain */
2024-01-20 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Bra.h"
#include "CpuArch.h"
#include "RotateDefs.h"
#include "CpuArch.h"
#if defined(MY_CPU_SIZEOF_POINTER) \
&& ( MY_CPU_SIZEOF_POINTER == 4 \
@@ -26,7 +26,7 @@
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name
#define Z7_BRANCH_CONV(name) z7_ ## name
#define Z7_BRANCH_FUNC_MAIN(name) \
static \
@@ -42,11 +42,11 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
#ifdef Z7_EXTRACT_ONLY
#define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0)
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
#else
#define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1)
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
#endif
#if defined(__clang__)
@@ -72,7 +72,7 @@ Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
#endif
Z7_BRANCH_FUNC_MAIN(ARM64)
Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
{
// Byte *p = data;
const Byte *lim;
@@ -121,10 +121,10 @@ Z7_BRANCH_FUNC_MAIN(ARM64)
}
}
}
Z7_BRANCH_FUNCS_IMP(ARM64)
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
Z7_BRANCH_FUNC_MAIN(ARM)
Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
{
// Byte *p = data;
const Byte *lim;
@@ -152,10 +152,10 @@ Z7_BRANCH_FUNC_MAIN(ARM)
}
}
}
Z7_BRANCH_FUNCS_IMP(ARM)
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
Z7_BRANCH_FUNC_MAIN(PPC)
Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
{
// Byte *p = data;
const Byte *lim;
@@ -192,14 +192,14 @@ Z7_BRANCH_FUNC_MAIN(PPC)
}
}
}
Z7_BRANCH_FUNCS_IMP(PPC)
Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
#define BR_SPARC_USE_ROTATE
#endif
Z7_BRANCH_FUNC_MAIN(SPARC)
Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
{
// Byte *p = data;
const Byte *lim;
@@ -254,10 +254,10 @@ Z7_BRANCH_FUNC_MAIN(SPARC)
}
}
}
Z7_BRANCH_FUNCS_IMP(SPARC)
Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
Z7_BRANCH_FUNC_MAIN(ARMT)
Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
{
// Byte *p = data;
Byte *lim;
@@ -335,12 +335,12 @@ Z7_BRANCH_FUNC_MAIN(ARMT)
// return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
// return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
}
Z7_BRANCH_FUNCS_IMP(ARMT)
Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
// #define BR_IA64_NO_INLINE
Z7_BRANCH_FUNC_MAIN(IA64)
Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
{
// Byte *p = data;
const Byte *lim;
@@ -417,4 +417,293 @@ Z7_BRANCH_FUNC_MAIN(IA64)
}
}
}
Z7_BRANCH_FUNCS_IMP(IA64)
Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET;
#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET;
#if 1 && defined(MY_CPU_LE_UNALIGN)
#define RISCV_USE_UNALIGNED_LOAD
#endif
#ifdef RISCV_USE_UNALIGNED_LOAD
#define RISCV_GET_UI32(p) GetUi32(p)
#define RISCV_SET_UI32(p, v) { SetUi32(p, v) }
#else
#define RISCV_GET_UI32(p) \
((UInt32)GetUi16a(p) + \
((UInt32)GetUi16a((p) + 2) << 16))
#define RISCV_SET_UI32(p, v) { \
SetUi16a(p, (UInt16)(v)) \
SetUi16a((p) + 2, (UInt16)(v >> 16)) }
#endif
#if 1 && defined(MY_CPU_LE)
#define RISCV_USE_16BIT_LOAD
#endif
#ifdef RISCV_USE_16BIT_LOAD
#define RISCV_LOAD_VAL(p) GetUi16a(p)
#else
#define RISCV_LOAD_VAL(p) (*(p))
#endif
#define RISCV_INSTR_SIZE 2
#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE)
#define RISCV_STEP_2 4
#define RISCV_REG_VAL (2 << 7)
#define RISCV_CMD_VAL 3
#if 1
// for code size optimization:
#define RISCV_DELTA_7F 0x7f
#else
#define RISCV_DELTA_7F 0
#endif
#define RISCV_CHECK_1(v, b) \
(((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
#if 1
#define RISCV_CHECK_2(v, r) \
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
<< 18) \
< ((r) & 0x1d))
#else
// this branch gives larger code, because
// compilers generate larger code for big constants.
#define RISCV_CHECK_2(v, r) \
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
& ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
< ((r) & 0x1d))
#endif
#define RISCV_SCAN_LOOP \
Byte *lim; \
size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
if (size <= 6) return p; \
size -= 6; \
lim = p + size; \
BR_PC_INIT \
for (;;) \
{ \
UInt32 a, v; \
/* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
for (;;) \
{ \
if Z7_UNLIKELY(p >= lim) { return p; } \
a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
if ((a & 0x77) == 0) break; \
a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
p += RISCV_INSTR_SIZE * 2; \
if ((a & 0x77) == 0) \
{ \
p -= RISCV_INSTR_SIZE; \
if Z7_UNLIKELY(p >= lim) { return p; } \
break; \
} \
}
// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL
// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC
// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC
Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
{
RISCV_SCAN_LOOP
v = a;
a = RISCV_GET_UI32(p);
#ifndef RISCV_USE_16BIT_LOAD
v += (UInt32)p[1] << 8;
#endif
if ((v & 8) == 0) // JAL
{
if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
{
p += RISCV_INSTR_SIZE;
continue;
}
{
v = ((a & 1u << 31) >> 11)
| ((a & 0x3ff << 21) >> 20)
| ((a & 1 << 20) >> 9)
| (a & 0xff << 12);
BR_CONVERT_VAL_ENC(v)
// ((v & 1) == 0)
// v: bits [1 : 20] contain offset bits
#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
a &= 0xfff;
a |= ((UInt32)(v << 23))
| ((UInt32)(v << 7) & ((UInt32)0xff << 16))
| ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8));
RISCV_SET_UI32(p, a)
#else // aligned
#if 0
SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
#else
p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
#endif
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
v <<= 15;
v = Z7_BSWAP32(v);
SetUi16a(p + 2, (UInt16)v)
#else
p[2] = (Byte)(v >> 9);
p[3] = (Byte)(v >> 1);
#endif
#endif // aligned
}
p += 4;
continue;
} // JAL
{
// AUIPC
if (v & 0xe80) // (not x0) and (not x2)
{
const UInt32 b = RISCV_GET_UI32(p + 4);
if (RISCV_CHECK_1(v, b))
{
{
const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
RISCV_SET_UI32(p, temp)
}
a &= 0xfffff000;
{
#if 1
const int t = -1 >> 1;
if (t != -1)
a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
else
#endif
a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
}
BR_CONVERT_VAL_ENC(a)
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
a = Z7_BSWAP32(a);
RISCV_SET_UI32(p + 4, a)
#else
SetBe32(p + 4, a)
#endif
p += 8;
}
else
p += RISCV_STEP_1;
}
else
{
UInt32 r = a >> 27;
if (RISCV_CHECK_2(v, r))
{
v = RISCV_GET_UI32(p + 4);
r = (r << 7) + 0x17 + (v & 0xfffff000);
a = (a >> 12) | (v << 20);
RISCV_SET_UI32(p, r)
RISCV_SET_UI32(p + 4, a)
p += 8;
}
else
p += RISCV_STEP_2;
}
}
} // for
}
Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
{
RISCV_SCAN_LOOP
#ifdef RISCV_USE_16BIT_LOAD
if ((a & 8) == 0)
{
#else
v = a;
a += (UInt32)p[1] << 8;
if ((v & 8) == 0)
{
#endif
// JAL
a -= 0x100 - RISCV_DELTA_7F;
if (a & 0xd80)
{
p += RISCV_INSTR_SIZE;
continue;
}
{
const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
#if 0 // unaligned
a = GetUi32(p);
v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
| (UInt32)(a >> 7) & ((UInt32)0xff << 9)
#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
v = GetUi16a(p + 2);
v = Z7_BSWAP32(v) >> 15
#else
v = (UInt32)p[3] << 1
| (UInt32)p[2] << 9
#endif
| (UInt32)((a & 0xf000) << 5);
BR_CONVERT_VAL_DEC(v)
a = a_old
| (v << 11 & 1u << 31)
| (v << 20 & 0x3ff << 21)
| (v << 9 & 1 << 20)
| (v & 0xff << 12);
RISCV_SET_UI32(p, a)
}
p += 4;
continue;
} // JAL
{
// AUIPC
v = a;
#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
a = GetUi32(p);
#else
a |= (UInt32)GetUi16a(p + 2) << 16;
#endif
if ((v & 0xe80) == 0) // x0/x2
{
const UInt32 r = a >> 27;
if (RISCV_CHECK_2(v, r))
{
UInt32 b;
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
b = RISCV_GET_UI32(p + 4);
b = Z7_BSWAP32(b);
#else
b = GetBe32(p + 4);
#endif
v = a >> 12;
BR_CONVERT_VAL_DEC(b)
a = (r << 7) + 0x17;
a += (b + 0x800) & 0xfffff000;
v |= b << 20;
RISCV_SET_UI32(p, a)
RISCV_SET_UI32(p + 4, v)
p += 8;
}
else
p += RISCV_STEP_2;
}
else
{
const UInt32 b = RISCV_GET_UI32(p + 4);
if (!RISCV_CHECK_1(v, b))
p += RISCV_STEP_1;
else
{
v = (a & 0xfffff000) | (b >> 20);
a = (b << 12) | (0x17 + RISCV_REG_VAL);
RISCV_SET_UI32(p, a)
RISCV_SET_UI32(p + 4, v)
p += 8;
}
}
}
} // for
}

View File

@@ -1,5 +1,5 @@
/* CpuArch.c -- CPU specific code
2023-05-18 : Igor Pavlov : Public domain */
2024-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -226,7 +226,7 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
*/
static
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo)
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int32 *CPUInfo)
{
UNUSED_VAR(subFunction)
__cpuid(CPUInfo, func);
@@ -242,13 +242,13 @@ Z7_NO_INLINE
#endif
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
MY_cpuidex((int *)p, (int)func, 0);
MY_cpuidex((Int32 *)p, (Int32)func, 0);
}
Z7_NO_INLINE
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
int a[4];
Int32 a[4];
MY_cpuidex(a, 0, 0);
return a[0];
}
@@ -384,7 +384,7 @@ BoolInt CPU_IsSupported_CMOV(void)
UInt32 a[4];
if (!x86cpuid_Func_1(&a[0]))
return 0;
return (a[3] >> 15) & 1;
return (BoolInt)(a[3] >> 15) & 1;
}
BoolInt CPU_IsSupported_SSE(void)
@@ -393,7 +393,7 @@ BoolInt CPU_IsSupported_SSE(void)
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0]))
return 0;
return (a[3] >> 25) & 1;
return (BoolInt)(a[3] >> 25) & 1;
}
BoolInt CPU_IsSupported_SSE2(void)
@@ -402,7 +402,7 @@ BoolInt CPU_IsSupported_SSE2(void)
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0]))
return 0;
return (a[3] >> 26) & 1;
return (BoolInt)(a[3] >> 26) & 1;
}
#endif
@@ -419,17 +419,17 @@ static UInt32 x86cpuid_Func_1_ECX(void)
BoolInt CPU_IsSupported_AES(void)
{
return (x86cpuid_Func_1_ECX() >> 25) & 1;
return (BoolInt)(x86cpuid_Func_1_ECX() >> 25) & 1;
}
BoolInt CPU_IsSupported_SSSE3(void)
{
return (x86cpuid_Func_1_ECX() >> 9) & 1;
return (BoolInt)(x86cpuid_Func_1_ECX() >> 9) & 1;
}
BoolInt CPU_IsSupported_SSE41(void)
{
return (x86cpuid_Func_1_ECX() >> 19) & 1;
return (BoolInt)(x86cpuid_Func_1_ECX() >> 19) & 1;
}
BoolInt CPU_IsSupported_SHA(void)
@@ -441,7 +441,7 @@ BoolInt CPU_IsSupported_SHA(void)
{
UInt32 d[4];
z7_x86_cpuid(d, 7);
return (d[1] >> 29) & 1;
return (BoolInt)(d[1] >> 29) & 1;
}
}
@@ -638,10 +638,10 @@ BoolInt CPU_IsSupported_AVX(void)
{
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
// printf("\n=== XGetBV=%d\n", bm);
// printf("\n=== XGetBV=0x%x\n", bm);
return 1
& (bm >> 1) // SSE state is supported (set by OS) for storing/restoring
& (bm >> 2); // AVX state is supported (set by OS) for storing/restoring
& (BoolInt)(bm >> 1) // SSE state is supported (set by OS) for storing/restoring
& (BoolInt)(bm >> 2); // AVX state is supported (set by OS) for storing/restoring
}
// since Win7SP1: we can use GetEnabledXStateFeatures();
}
@@ -658,10 +658,39 @@ BoolInt CPU_IsSupported_AVX2(void)
z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5); // avx2
& (BoolInt)(d[1] >> 5); // avx2
}
}
#if 0
BoolInt CPU_IsSupported_AVX512F_AVX512VL(void)
{
if (!CPU_IsSupported_AVX())
return False;
if (z7_x86_cpuid_GetMaxFunc() < 7)
return False;
{
UInt32 d[4];
BoolInt v;
z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
v = 1
& (BoolInt)(d[1] >> 16) // avx512f
& (BoolInt)(d[1] >> 31); // avx512vl
if (!v)
return False;
}
{
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
// printf("\n=== XGetBV=0x%x\n", bm);
return 1
& (BoolInt)(bm >> 5) // OPMASK
& (BoolInt)(bm >> 6) // ZMM upper 256-bit
& (BoolInt)(bm >> 7); // ZMM16 ... ZMM31
}
}
#endif
BoolInt CPU_IsSupported_VAES_AVX2(void)
{
if (!CPU_IsSupported_AVX())
@@ -673,9 +702,9 @@ BoolInt CPU_IsSupported_VAES_AVX2(void)
z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5) // avx2
& (BoolInt)(d[1] >> 5) // avx2
// & (d[1] >> 31) // avx512vl
& (d[2] >> 9); // vaes // VEX-256/EVEX
& (BoolInt)(d[2] >> 9); // vaes // VEX-256/EVEX
}
}
@@ -688,7 +717,7 @@ BoolInt CPU_IsSupported_PageGB(void)
if (d[0] < 0x80000001)
return False;
z7_x86_cpuid(d, 0x80000001);
return (d[3] >> 26) & 1;
return (BoolInt)(d[3] >> 26) & 1;
}
}
@@ -760,33 +789,70 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
#else // __APPLE__
#include <sys/auxv.h>
#if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216)
#define Z7_GETAUXV_AVAILABLE
#else
// #pragma message("=== is not NEW GLIBC === ")
#if defined __has_include
#if __has_include (<sys/auxv.h>)
// #pragma message("=== sys/auxv.h is avail=== ")
#define Z7_GETAUXV_AVAILABLE
#endif
#endif
#endif
#ifdef Z7_GETAUXV_AVAILABLE
// #pragma message("=== Z7_GETAUXV_AVAILABLE === ")
#include <sys/auxv.h>
#define USE_HWCAP
#endif
#ifdef USE_HWCAP
#if defined(__FreeBSD__)
static unsigned long MY_getauxval(int aux)
{
unsigned long val;
if (elf_aux_info(aux, &val, sizeof(val)))
return 0;
return val;
}
#else
#define MY_getauxval getauxval
#if defined __has_include
#if __has_include (<asm/hwcap.h>)
#include <asm/hwcap.h>
#endif
#endif
#endif
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
BoolInt CPU_IsSupported_ ## name1(void) { return (MY_getauxval(AT_HWCAP) & (HWCAP_ ## name2)); }
#ifdef MY_CPU_ARM64
#define MY_HWCAP_CHECK_FUNC(name) \
MY_HWCAP_CHECK_FUNC_2(name, name)
#if 1 || defined(__ARM_NEON)
BoolInt CPU_IsSupported_NEON(void) { return True; }
#else
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
#endif
// MY_HWCAP_CHECK_FUNC (ASIMD)
#elif defined(MY_CPU_ARM)
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
BoolInt CPU_IsSupported_ ## name(void) { return (MY_getauxval(AT_HWCAP2) & (HWCAP2_ ## name)); }
MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
#endif
#else // USE_HWCAP
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return 0; }
BoolInt CPU_IsSupported_ ## name(void) { return 0; }
#if defined(__ARM_NEON)
BoolInt CPU_IsSupported_NEON(void) { return True; }
#else
MY_HWCAP_CHECK_FUNC(NEON)
#endif
#endif // USE_HWCAP

View File

@@ -1,5 +1,5 @@
/* DllSecur.c -- DLL loading security
2023-04-02 : Igor Pavlov : Public domain */
2023-12-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -11,19 +11,7 @@
#ifndef UNDER_CE
#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
// #pragma GCC diagnostic ignored "-Wcast-function-type"
#endif
#if defined(__clang__) || defined(__GNUC__)
typedef void (*Z7_voidFunction)(void);
#define MY_CAST_FUNC (Z7_voidFunction)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define MY_CAST_FUNC (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define MY_CAST_FUNC
#endif
Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
@@ -61,7 +49,7 @@ static const char * const g_Dlls =
if ((UInt16)GetVersion() != 6) { \
const \
Func_SetDefaultDllDirectories setDllDirs = \
(Func_SetDefaultDllDirectories) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \
(Func_SetDefaultDllDirectories) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \
"SetDefaultDllDirectories"); \
if (setDllDirs) if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) return; }

View File

@@ -1,5 +1,5 @@
/* LzFind.c -- Match finder for LZ algorithms
2023-03-14 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -108,9 +108,15 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
return (p->bufBase != NULL);
}
static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
static const Byte *MatchFinder_GetPointerToCurrentPos(void *p)
{
return ((CMatchFinder *)p)->buffer;
}
static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
static UInt32 MatchFinder_GetNumAvailableBytes(void *p)
{
return GET_AVAIL_BYTES((CMatchFinder *)p);
}
Z7_NO_INLINE
@@ -571,8 +577,9 @@ void MatchFinder_Init_4(CMatchFinder *p)
#define CYC_TO_POS_OFFSET 0
// #define CYC_TO_POS_OFFSET 1 // for debug
void MatchFinder_Init(CMatchFinder *p)
void MatchFinder_Init(void *_p)
{
CMatchFinder *p = (CMatchFinder *)_p;
MatchFinder_Init_HighHash(p);
MatchFinder_Init_LowHash(p);
MatchFinder_Init_4(p);
@@ -607,16 +614,16 @@ void MatchFinder_Init(CMatchFinder *p)
#endif
#endif
// #elif defined(MY_CPU_ARM_OR_ARM64)
#elif defined(MY_CPU_ARM64)
#elif defined(MY_CPU_ARM64) \
/* || (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) */
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#if defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ >= 6)
#define USE_LZFIND_SATUR_SUB_128
#ifdef MY_CPU_ARM64
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
#else
// #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=neon")))
#endif
#elif defined(_MSC_VER)
@@ -625,7 +632,7 @@ void MatchFinder_Init(CMatchFinder *p)
#endif
#endif
#if defined(_MSC_VER) && !defined(__clang__) && defined(MY_CPU_ARM64)
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
@@ -1082,9 +1089,11 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
#define MOVE_POS \
++p->cyclicBufferPos; \
p->cyclicBufferPos++; \
p->buffer++; \
{ const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
{ const UInt32 pos1 = p->pos + 1; \
p->pos = pos1; \
if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); }
#define MOVE_POS_RET MOVE_POS return distances;
@@ -1103,20 +1112,26 @@ static void MatchFinder_MovePos(CMatchFinder *p)
}
#define GET_MATCHES_HEADER2(minLen, ret_op) \
unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
UInt32 hv; const Byte *cur; UInt32 curMatch; \
UInt32 lenLimit = p->lenLimit; \
if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; } \
cur = p->buffer;
#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances)
#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue)
#define SKIP_HEADER(minLen) \
do { GET_MATCHES_HEADER2(minLen, continue)
#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, \
p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num);
#define SKIP_FOOTER \
SkipMatchesSpec(MF_PARAMS(p)); \
MOVE_POS \
} while (--num);
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
distances = func(MF_PARAMS(p), \
distances, (UInt32)_maxLen_); MOVE_POS_RET
distances = func(MF_PARAMS(p), distances, (UInt32)_maxLen_); \
MOVE_POS_RET
#define GET_MATCHES_FOOTER_BT(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
@@ -1133,8 +1148,9 @@ static void MatchFinder_MovePos(CMatchFinder *p)
for (; c != lim; c++) if (*(c + diff) != *c) break; \
maxLen = (unsigned)(c - cur); }
static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32* Bt2_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{
CMatchFinder *p = (CMatchFinder *)_p;
GET_MATCHES_HEADER(2)
HASH2_CALC
curMatch = p->hash[hv];
@@ -1158,8 +1174,9 @@ UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
mmm = pos;
static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32* Bt3_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm;
UInt32 h2, d2, pos;
unsigned maxLen;
@@ -1199,8 +1216,9 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32* Bt4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
unsigned maxLen;
@@ -1267,10 +1285,12 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32* Bt5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm;
UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 h2, h3, d2, d3, pos;
unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -1339,8 +1359,9 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32* Hc4_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm;
UInt32 h2, h3, d2, d3, pos;
unsigned maxLen;
@@ -1407,10 +1428,12 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
static UInt32 * Hc5_MatchFinder_GetMatches(void *_p, UInt32 *distances)
{
CMatchFinder *p = (CMatchFinder *)_p;
UInt32 mmm;
UInt32 h2, h3, d2, d3, maxLen, pos;
UInt32 h2, h3, d2, d3, pos;
unsigned maxLen;
UInt32 *hash;
GET_MATCHES_HEADER(5)
@@ -1466,7 +1489,7 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (*(cur - d2 + 3) != cur[3])
break;
UPDATE_maxLen
distances[-2] = maxLen;
distances[-2] = (UInt32)maxLen;
if (maxLen == lenLimit)
{
p->son[p->cyclicBufferPos] = curMatch;
@@ -1489,8 +1512,9 @@ UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
}
static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
static void Bt2_MatchFinder_Skip(void *_p, UInt32 num)
{
CMatchFinder *p = (CMatchFinder *)_p;
SKIP_HEADER(2)
{
HASH2_CALC
@@ -1511,8 +1535,9 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
SKIP_FOOTER
}
static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
static void Bt3_MatchFinder_Skip(void *_p, UInt32 num)
{
CMatchFinder *p = (CMatchFinder *)_p;
SKIP_HEADER(3)
{
UInt32 h2;
@@ -1526,8 +1551,9 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
SKIP_FOOTER
}
static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
static void Bt4_MatchFinder_Skip(void *_p, UInt32 num)
{
CMatchFinder *p = (CMatchFinder *)_p;
SKIP_HEADER(4)
{
UInt32 h2, h3;
@@ -1542,8 +1568,9 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
SKIP_FOOTER
}
static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
{
CMatchFinder *p = (CMatchFinder *)_p;
SKIP_HEADER(5)
{
UInt32 h2, h3;
@@ -1589,8 +1616,9 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
}} while(num); \
static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
static void Hc4_MatchFinder_Skip(void *_p, UInt32 num)
{
CMatchFinder *p = (CMatchFinder *)_p;
HC_SKIP_HEADER(4)
UInt32 h2, h3;
@@ -1604,8 +1632,9 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
}
static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
static void Hc5_MatchFinder_Skip(void *_p, UInt32 num)
{
CMatchFinder *p = (CMatchFinder *)_p;
HC_SKIP_HEADER(5)
UInt32 h2, h3;
@@ -1634,41 +1663,41 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinder_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
vTable->Init = MatchFinder_Init;
vTable->GetNumAvailableBytes = MatchFinder_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = MatchFinder_GetPointerToCurrentPos;
if (!p->btMode)
{
if (p->numHashBytes <= 4)
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
vTable->GetMatches = Hc4_MatchFinder_GetMatches;
vTable->Skip = Hc4_MatchFinder_Skip;
}
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
vTable->GetMatches = Hc5_MatchFinder_GetMatches;
vTable->Skip = Hc5_MatchFinder_Skip;
}
}
else if (p->numHashBytes == 2)
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
vTable->GetMatches = Bt2_MatchFinder_GetMatches;
vTable->Skip = Bt2_MatchFinder_Skip;
}
else if (p->numHashBytes == 3)
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
vTable->GetMatches = Bt3_MatchFinder_GetMatches;
vTable->Skip = Bt3_MatchFinder_Skip;
}
else if (p->numHashBytes == 4)
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
vTable->GetMatches = Bt4_MatchFinder_GetMatches;
vTable->Skip = Bt4_MatchFinder_Skip;
}
else
{
vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
vTable->GetMatches = Bt5_MatchFinder_GetMatches;
vTable->Skip = Bt5_MatchFinder_Skip;
}
}

View File

@@ -1,5 +1,5 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2023-04-02 : Igor Pavlov : Public domain */
2024-01-22 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -94,7 +94,7 @@ static void MtSync_Construct(CMtSync *p)
}
#define DEBUG_BUFFER_LOCK // define it to debug lock state
// #define DEBUG_BUFFER_LOCK // define it to debug lock state
#ifdef DEBUG_BUFFER_LOCK
#include <stdlib.h>
@@ -877,8 +877,9 @@ SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
}
static void MatchFinderMt_Init(CMatchFinderMt *p)
static void MatchFinderMt_Init(void *_p)
{
CMatchFinderMt *p = (CMatchFinderMt *)_p;
CMatchFinder *mf = MF(p);
p->btBufPos =
@@ -981,8 +982,9 @@ static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
static const Byte * MatchFinderMt_GetPointerToCurrentPos(void *_p)
{
CMatchFinderMt *p = (CMatchFinderMt *)_p;
return p->pointerToCurPos;
}
@@ -990,8 +992,9 @@ static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
static UInt32 MatchFinderMt_GetNumAvailableBytes(void *_p)
{
CMatchFinderMt *p = (CMatchFinderMt *)_p;
if (p->btBufPos != p->btBufPosLimit)
return p->btNumAvailBytes;
return MatchFinderMt_GetNextBlock_Bt(p);
@@ -1243,8 +1246,9 @@ static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
}
static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
static UInt32 * MatchFinderMt2_GetMatches(void *_p, UInt32 *d)
{
CMatchFinderMt *p = (CMatchFinderMt *)_p;
const UInt32 *bt = p->btBufPos;
const UInt32 len = *bt++;
const UInt32 *btLim = bt + len;
@@ -1267,8 +1271,9 @@ static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
static UInt32 * MatchFinderMt_GetMatches(void *_p, UInt32 *d)
{
CMatchFinderMt *p = (CMatchFinderMt *)_p;
const UInt32 *bt = p->btBufPos;
UInt32 len = *bt++;
const UInt32 avail = p->btNumAvailBytes - 1;
@@ -1315,14 +1320,16 @@ static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
static void MatchFinderMt0_Skip(void *_p, UInt32 num)
{
CMatchFinderMt *p = (CMatchFinderMt *)_p;
SKIP_HEADER2_MT { p->btNumAvailBytes--;
SKIP_FOOTER_MT
}
static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
static void MatchFinderMt2_Skip(void *_p, UInt32 num)
{
CMatchFinderMt *p = (CMatchFinderMt *)_p;
SKIP_HEADER_MT(2)
UInt32 h2;
MT_HASH2_CALC
@@ -1330,8 +1337,9 @@ static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
SKIP_FOOTER_MT
}
static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
static void MatchFinderMt3_Skip(void *_p, UInt32 num)
{
CMatchFinderMt *p = (CMatchFinderMt *)_p;
SKIP_HEADER_MT(3)
UInt32 h2, h3;
MT_HASH3_CALC
@@ -1361,39 +1369,39 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
{
vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
vTable->Init = MatchFinderMt_Init;
vTable->GetNumAvailableBytes = MatchFinderMt_GetNumAvailableBytes;
vTable->GetPointerToCurrentPos = MatchFinderMt_GetPointerToCurrentPos;
vTable->GetMatches = MatchFinderMt_GetMatches;
switch (MF(p)->numHashBytes)
{
case 2:
p->GetHeadsFunc = GetHeads2;
p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
p->MixMatchesFunc = NULL;
vTable->Skip = MatchFinderMt0_Skip;
vTable->GetMatches = MatchFinderMt2_GetMatches;
break;
case 3:
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
p->MixMatchesFunc = MixMatches2;
vTable->Skip = MatchFinderMt2_Skip;
break;
case 4:
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
// it's fast inline version of GetMatches()
// vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
// vTable->GetMatches = MatchFinderMt_GetMatches_Bt4;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
p->MixMatchesFunc = MixMatches3;
vTable->Skip = MatchFinderMt3_Skip;
break;
default:
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
p->MixMatchesFunc = MixMatches4;
vTable->Skip =
(Mf_Skip_Func)MatchFinderMt3_Skip;
// (Mf_Skip_Func)MatchFinderMt4_Skip;
MatchFinderMt3_Skip;
// MatchFinderMt4_Skip;
break;
}
}

View File

@@ -1,5 +1,5 @@
/* Lzma2Dec.c -- LZMA2 Decoder
2023-03-03 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */
@@ -157,8 +157,10 @@ static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
p->decoder.prop.lp = (Byte)lp;
return LZMA2_STATE_DATA;
}
default:
return LZMA2_STATE_ERROR;
}
return LZMA2_STATE_ERROR;
}
static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)

View File

@@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder
2023-04-13: Igor Pavlov : Public domain */
2024-01-24: Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -195,11 +195,11 @@ unsigned GetPosSlot1(UInt32 pos);
unsigned GetPosSlot1(UInt32 pos)
{
unsigned res;
BSR2_RET(pos, res);
BSR2_RET(pos, res)
return res;
}
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
#define GetPosSlot2(pos, res) { BSR2_RET(pos, res) }
#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res) }
#else // ! LZMA_LOG_BSR
@@ -512,7 +512,7 @@ struct CLzmaEnc
COPY_ARR(d, s, posEncoders) \
(d)->lenProbs = (s)->lenProbs; \
(d)->repLenProbs = (s)->repLenProbs; \
memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb));
memcpy((d)->litProbs, (s)->litProbs, ((size_t)0x300 * sizeof(CLzmaProb)) << (p)->lclp);
void LzmaEnc_SaveState(CLzmaEncHandle p)
{
@@ -1040,14 +1040,14 @@ Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
UInt32 price = b;
do
{
unsigned bit = sym & 1;
const unsigned bit = sym & 1;
sym >>= 1;
price += GET_PRICEa(probs[sym], bit);
}
while (sym >= 2);
{
unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
const unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
prices[(size_t)i * 2 ] = price + GET_PRICEa_0(prob);
prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
}
@@ -1056,7 +1056,7 @@ Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
{
unsigned posState;
size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
const size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
for (posState = 1; posState < numPosStates; posState++)
memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
}
@@ -2696,12 +2696,12 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
#endif
{
unsigned lclp = p->lc + p->lp;
const unsigned lclp = p->lc + p->lp;
if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
{
LzmaEnc_FreeLits(p, alloc);
p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((size_t)0x300 * sizeof(CLzmaProb)) << lclp);
if (!p->litProbs || !p->saveState.litProbs)
{
LzmaEnc_FreeLits(p, alloc);
@@ -2802,8 +2802,8 @@ static void LzmaEnc_Init(CLzmaEnc *p)
}
{
UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
UInt32 k;
const size_t num = (size_t)0x300 << (p->lp + p->lc);
size_t k;
CLzmaProb *probs = p->litProbs;
for (k = 0; k < num; k++)
probs[k] = kProbInitValue;

View File

@@ -1,5 +1,5 @@
/* MtCoder.c -- Multi-thread Coder
2023-04-13 : Igor Pavlov : Public domain */
2023-09-07 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -430,7 +430,7 @@ SRes MtCoder_Code(CMtCoder *p)
SRes res = SZ_OK;
if (numThreads > MTCODER_THREADS_MAX)
numThreads = MTCODER_THREADS_MAX;
numThreads = MTCODER_THREADS_MAX;
numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads);
if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++;
@@ -438,7 +438,7 @@ SRes MtCoder_Code(CMtCoder *p)
if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++;
if (numBlocksMax > MTCODER_BLOCKS_MAX)
numBlocksMax = MTCODER_BLOCKS_MAX;
numBlocksMax = MTCODER_BLOCKS_MAX;
if (p->blockSize != p->allocatedBufsSize)
{
@@ -469,7 +469,7 @@ SRes MtCoder_Code(CMtCoder *p)
{
RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->readEvent))
RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax))
RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, (UInt32)numBlocksMax, (UInt32)numBlocksMax))
}
for (i = 0; i < MTCODER_BLOCKS_MAX - 1; i++)

View File

@@ -1,5 +1,5 @@
/* MtDec.c -- Multi-thread Decoder
2023-04-02 : Igor Pavlov : Public domain */
2024-02-20 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -809,6 +809,16 @@ static WRes MtDec_ThreadFunc2(CMtDecThread *t)
#endif
typedef
#ifdef _WIN32
UINT_PTR
#elif 1
uintptr_t
#else
ptrdiff_t
#endif
MY_uintptr_t;
static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp)
{
WRes res;
@@ -821,7 +831,7 @@ static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp)
res = MtDec_ThreadFunc2(t);
p = t->mtDec;
if (res == 0)
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes;
return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)p->exitThreadWRes;
{
// it's unexpected situation for some threading function error
if (p->exitThreadWRes == 0)
@@ -832,7 +842,7 @@ static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp)
Event_Set(&p->threads[0].canWrite);
MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res));
}
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res;
return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)res;
}
static Z7_NO_INLINE THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp)
@@ -1072,7 +1082,7 @@ SRes MtDec_Code(CMtDec *p)
if (wres == 0) { wres = Event_Set(&nextThread->canWrite);
if (wres == 0) { wres = Event_Set(&nextThread->canRead);
if (wres == 0) { THREAD_FUNC_RET_TYPE res = MtDec_ThreadFunc(nextThread);
wres = (WRes)(UINT_PTR)res;
wres = (WRes)(MY_uintptr_t)res;
if (wres != 0)
{
p->needContinue = False;

View File

@@ -1,5 +1,5 @@
/* Ppmd7.c -- PPMdH codec
2023-04-02 : Igor Pavlov : Public domain
2023-09-07 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
@@ -302,8 +302,17 @@ static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx)
#define MEM_12_CPY(dest, src, num) \
{ UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
{ UInt32 *d = (UInt32 *)(dest); \
const UInt32 *z = (const UInt32 *)(src); \
unsigned n = (num); \
do { \
d[0] = z[0]; \
d[1] = z[1]; \
d[2] = z[2]; \
z += 3; \
d += 3; \
} while (--n); \
}
/*
@@ -711,8 +720,8 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
if ((ns1 & 1) == 0)
{
/* Expand for one UNIT */
unsigned oldNU = ns1 >> 1;
unsigned i = U2I(oldNU);
const unsigned oldNU = ns1 >> 1;
const unsigned i = U2I(oldNU);
if (i != U2I((size_t)oldNU + 1))
{
void *ptr = Ppmd7_AllocUnits(p, i + 1);
@@ -731,7 +740,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
sum = c->Union2.SummFreq;
/* max increase of Escape_Freq is 3 here.
total increase of Union2.SummFreq for all symbols is less than 256 here */
sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1));
sum += (UInt32)(unsigned)((2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)));
/* original PPMdH uses 16-bit variable for (sum) here.
But (sum < 0x9000). So we don't truncate (sum) to 16-bit */
// sum = (UInt16)sum;
@@ -761,7 +770,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
// (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context
s->Freq = (Byte)freq;
// max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here
sum = freq + p->InitEsc + (ns > 3);
sum = (UInt32)(freq + p->InitEsc + (ns > 3));
}
}
@@ -933,10 +942,10 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
p->HiBitsFlag;
{
// if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
unsigned summ = (UInt16)see->Summ; // & 0xFFFF
unsigned r = (summ >> see->Shift);
const unsigned summ = (UInt16)see->Summ; // & 0xFFFF
const unsigned r = (summ >> see->Shift);
see->Summ = (UInt16)(summ - r);
*escFreq = r + (r == 0);
*escFreq = (UInt32)(r + (r == 0));
}
}
else
@@ -981,9 +990,9 @@ void Ppmd7_Update1_0(CPpmd7 *p)
CPpmd_State *s = p->FoundState;
CPpmd7_Context *mc = p->MinContext;
unsigned freq = s->Freq;
unsigned summFreq = mc->Union2.SummFreq;
const unsigned summFreq = mc->Union2.SummFreq;
p->PrevSuccess = (2 * freq > summFreq);
p->RunLength += (int)p->PrevSuccess;
p->RunLength += (Int32)p->PrevSuccess;
mc->Union2.SummFreq = (UInt16)(summFreq + 4);
freq += 4;
s->Freq = (Byte)freq;

View File

@@ -1,5 +1,5 @@
/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
2023-04-02 : Igor Pavlov : Public domain
2023-09-07 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
@@ -58,7 +58,7 @@ static void Ppmd7z_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
#define MASK(sym) ((Byte *)charMask)[sym]
// Z7_FORCE_INLINE
// static
int Ppmd7z_DecodeSymbol(CPpmd7 *p)
@@ -120,8 +120,8 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
const unsigned sym0 = s2[0].Symbol;
const unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
@@ -209,17 +209,17 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
const unsigned sym0 = s[0].Symbol;
const unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0)));
hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1)));
}
while (--num2);
}
@@ -238,13 +238,13 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
s = Ppmd7_GetStats(p, p->MinContext);
hiCnt = count;
// count -= s->Freq & (unsigned)(MASK(s->Symbol));
// count -= s->Freq & (UInt32)(MASK(s->Symbol));
// if ((Int32)count >= 0)
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
}
}
s--;

View File

@@ -1,5 +1,5 @@
/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder
2023-04-02 : Igor Pavlov : Public domain
2023-09-07 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
@@ -82,7 +82,7 @@ void Ppmd7z_Flush_RangeEnc(CPpmd7 *p)
void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
#define MASK(sym) ((Byte *)charMask)[sym]
Z7_FORCE_INLINE
static
@@ -139,8 +139,8 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
const unsigned sym0 = s2[0].Symbol;
const unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
@@ -265,16 +265,15 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
if (num2 != 0)
{
s += i;
for (;;)
do
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
const unsigned sym0 = s[0].Symbol;
const unsigned sym1 = s[1].Symbol;
s += 2;
sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
if (--num2 == 0)
break;
}
while (--num2);
}

View File

@@ -1,5 +1,5 @@
/* Sha256.c -- SHA-256 Hash
2023-04-02 : Igor Pavlov : Public domain
2024-03-01 : Igor Pavlov : Public domain
This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "Precomp.h"
@@ -15,35 +15,35 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */
#endif
#ifdef MY_CPU_X86_OR_AMD64
#ifdef _MSC_VER
#if _MSC_VER >= 1200
#if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \
|| defined(_MSC_VER) && (_MSC_VER >= 1200)
#define Z7_COMPILER_SHA256_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define Z7_COMPILER_SHA256_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define Z7_COMPILER_SHA256_SUPPORTED
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define Z7_COMPILER_SHA256_SUPPORTED
#endif
#endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER
#if _MSC_VER >= 1910
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__ARM_FEATURE_SHA2) \
|| defined(__ARM_FEATURE_CRYPTO)
#define Z7_COMPILER_SHA256_SUPPORTED
#else
#if defined(MY_CPU_ARM64) \
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|| defined(Z7_MSC_VER_ORIGINAL)
#if defined(__ARM_FP) && \
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ >= 6) \
) \
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON) && \
(Z7_CLANG_VERSION < 170000 || \
Z7_CLANG_VERSION > 170001)
#define Z7_COMPILER_SHA256_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define Z7_COMPILER_SHA256_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define Z7_COMPILER_SHA256_SUPPORTED
#endif
#endif
#endif
@@ -224,8 +224,6 @@ void Sha256_Init(CSha256 *p)
#endif
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
// static
extern MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];

View File

@@ -1,5 +1,5 @@
/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
2023-04-02 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Compiler.h"
@@ -11,6 +11,8 @@
#endif
#endif
// #define Z7_USE_HW_SHA_STUB // for debug
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
#define USE_HW_SHA
@@ -32,9 +34,14 @@
#endif
#if (_MSC_VER >= USE_VER_MIN)
#define USE_HW_SHA
#else
#define Z7_USE_HW_SHA_STUB
#endif
#endif
// #endif // MY_CPU_X86_OR_AMD64
#ifndef USE_HW_SHA
// #define Z7_USE_HW_SHA_STUB // for debug
#endif
#ifdef USE_HW_SHA
@@ -202,19 +209,28 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
#endif // USE_HW_SHA
#elif defined(MY_CPU_ARM_OR_ARM64)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__ARM_FEATURE_SHA2) \
|| defined(__ARM_FEATURE_CRYPTO)
#define USE_HW_SHA
#else
#if defined(MY_CPU_ARM64) \
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|| defined(Z7_MSC_VER_ORIGINAL)
#if defined(__ARM_FP) && \
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(__GNUC__) && (__GNUC__ >= 6) \
) \
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON) && \
(Z7_CLANG_VERSION < 170000 || \
Z7_CLANG_VERSION > 170001)
#define USE_HW_SHA
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_SHA
#endif
#endif
@@ -222,23 +238,87 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
// #pragma message("=== Sha256 HW === ")
#if defined(__clang__) || defined(__GNUC__)
#if !defined(__ARM_FEATURE_SHA2) && \
!defined(__ARM_FEATURE_CRYPTO)
#ifdef MY_CPU_ARM64
#define ATTRIB_SHA __attribute__((__target__("+crypto,sha2")))
#if defined(__clang__)
#define ATTRIB_SHA __attribute__((__target__("crypto")))
#else
#define ATTRIB_SHA __attribute__((__target__("+crypto")))
#endif
#else
#if defined(__clang__) && (__clang_major__ >= 1)
#define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2")))
#else
#define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#endif
#endif
#else
// _MSC_VER
// for arm32
#define _ARM_USE_NEW_NEON_INTRINSICS
#endif
#if defined(_MSC_VER) && !defined(__clang__) && defined(MY_CPU_ARM64)
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#if defined(__clang__) && __clang_major__ < 16
#if !defined(__ARM_FEATURE_SHA2) && \
!defined(__ARM_FEATURE_CRYPTO)
// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
// #if defined(__clang__) && __clang_major__ < 13
#define __ARM_FEATURE_CRYPTO 1
// #else
#define __ARM_FEATURE_SHA2 1
// #endif
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // clang
#if defined(__clang__)
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
// #pragma message("#define __ARM_ARCH 8")
#undef __ARM_ARCH
#define __ARM_ARCH 8
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif // clang
#include <arm_neon.h>
#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
defined(__ARM_FEATURE_CRYPTO) && \
defined(__ARM_FEATURE_SHA2)
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#undef __ARM_FEATURE_CRYPTO
#undef __ARM_FEATURE_SHA2
#undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
#endif
#endif // Z7_MSC_VER_ORIGINAL
typedef uint32x4_t v128;
// typedef __n128 v128; // MSVC
@@ -316,10 +396,10 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
state0 = vaddq_u32(state0, state0_save);
state1 = vaddq_u32(state1, state1_save);
@@ -337,16 +417,17 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
#endif // MY_CPU_ARM_OR_ARM64
#ifndef USE_HW_SHA
#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
// #error Stop_Compiling_UNSUPPORTED_SHA
// #include <stdlib.h>
// We can compile this file with another C compiler,
// or we can compile asm version.
// So we can generate real code instead of this stub function.
// #include "Sha256.h"
void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
// #if defined(_MSC_VER)
#pragma message("Sha256 HW-SW stub was used")
// #endif
void Z7_FASTCALL Sha256_UpdateBlocks (UInt32 state[8], const Byte *data, size_t numBlocks);
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
@@ -359,7 +440,6 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
return;
*/
}
#endif
@@ -384,3 +464,4 @@ void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_
#undef USE_HW_SHA
#undef ATTRIB_SHA
#undef USE_VER_MIN
#undef Z7_USE_HW_SHA_STUB

View File

@@ -1,5 +1,5 @@
/* SwapBytes.c -- Byte Swap conversion filter
2023-04-07 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -305,11 +305,12 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr)
msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want
*/
// _mm256_broadcastsi128_si256(*mask128_ptr);
/*
#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 80000)
#define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
MY_mm256_set_m128i
*/
_mm256_set_m128i(
#else
#define MY_mm256_set_m128i _mm256_set_m128i
#endif
MY_mm256_set_m128i(
*(const __m128i *)mask128_ptr,
*(const __m128i *)mask128_ptr);
#endif
@@ -330,32 +331,59 @@ ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr)
// compile message "NEON intrinsics not available with the soft-float ABI"
#elif defined(MY_CPU_ARM_OR_ARM64) || \
(defined(__ARM_ARCH) && (__ARM_ARCH >= 7))
// #elif defined(MY_CPU_ARM64)
#elif defined(MY_CPU_ARM_OR_ARM64) \
&& defined(MY_CPU_LE) \
&& !defined(Z7_DISABLE_ARM_NEON)
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) \
|| defined(__GNUC__) && (__GNUC__ >= 6)
#if defined(__ARM_FP)
#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 4)) \
|| defined(MY_CPU_ARM64)
#if defined(MY_CPU_ARM64) \
|| !defined(Z7_CLANG_VERSION) \
|| defined(__ARM_NEON)
#define USE_SWAP_128
#endif
#ifdef MY_CPU_ARM64
// #define SWAP_ATTRIB_NEON __attribute__((__target__("")))
#else
// #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#if defined(Z7_CLANG_VERSION)
// #define SWAP_ATTRIB_NEON __attribute__((__target__("neon")))
#else
// #pragma message("SWAP_ATTRIB_NEON __attribute__((__target__(fpu=neon))")
#define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=neon")))
#endif
#endif // MY_CPU_ARM64
#endif // __ARM_NEON
#endif // __ARM_ARCH
#endif // __ARM_FP
#elif defined(_MSC_VER)
#if (_MSC_VER >= 1910)
#define USE_SWAP_128
#endif
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#ifdef USE_SWAP_128
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
/*
#if !defined(__ARM_NEON)
#if defined(Z7_GCC_VERSION) && (__GNUC__ < 5) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 90201) \
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 5) && (Z7_GCC_VERSION < 100100)
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#pragma message("#define __ARM_NEON 1")
// #define __ARM_NEON 1
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif
#endif
*/
#include <arm_neon.h>
#endif
#endif
#ifndef USE_SWAP_128
#define FORCE_SWAP_MODE
@@ -464,6 +492,13 @@ Z7_ATTRIB_NO_VECTOR \
void Z7_FASTCALL
#if defined(MY_CPU_ARM_OR_ARM64)
#if defined(__clang__)
#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
#endif
#endif
#ifdef MY_CPU_64BIT
#if defined(MY_CPU_ARM64) \

View File

@@ -1,5 +1,5 @@
/* Threads.c -- multithreading library
2023-03-04 : Igor Pavlov : Public domain */
2024-03-28 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -195,20 +195,19 @@ WRes CriticalSection_Init(CCriticalSection *p)
// ---------- POSIX ----------
#ifndef __APPLE__
#if defined(__linux__) && !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef Z7_AFFINITY_DISABLE
// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
// clang < 3.6 : unknown warning group '-Wreserved-id-macro'
// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier"
// clang >= 13 : do not give warning
#if !defined(_GNU_SOURCE)
#if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12)
#pragma GCC diagnostic ignored "-Wreserved-id-macro"
#endif
#define _GNU_SOURCE
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
// #define _GNU_SOURCE
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif // !defined(_GNU_SOURCE)
#endif // Z7_AFFINITY_DISABLE
#endif // __APPLE__
#endif // __linux__
#include "Threads.h"
@@ -244,8 +243,9 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param,
{
if (cpuSet)
{
#ifdef Z7_AFFINITY_SUPPORTED
// pthread_attr_setaffinity_np() is not supported for MUSL compile.
// so we check for __GLIBC__ here
#if defined(Z7_AFFINITY_SUPPORTED) && defined( __GLIBC__)
/*
printf("\n affinity :");
unsigned i;
@@ -267,7 +267,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param,
// ret2 =
pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2;
#endif
#endif
}
ret = pthread_create(&p->_tid, &attr, func, param);
@@ -369,13 +369,20 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
{ return AutoResetEvent_Create(p, 0); }
#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13)
// freebsd:
#pragma GCC diagnostic ignored "-Wthread-safety-analysis"
#endif
WRes Event_Set(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex))
p->_state = True;
int res1 = pthread_cond_broadcast(&p->_cond);
int res2 = pthread_mutex_unlock(&p->_mutex);
return (res2 ? res2 : res1);
{
const int res1 = pthread_cond_broadcast(&p->_cond);
const int res2 = pthread_mutex_unlock(&p->_mutex);
return (res2 ? res2 : res1);
}
}
WRes Event_Reset(CEvent *p)
@@ -408,8 +415,8 @@ WRes Event_Close(CEvent *p)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
const int res1 = pthread_mutex_destroy(&p->_mutex);
const int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
@@ -487,8 +494,8 @@ WRes Semaphore_Close(CSemaphore *p)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
const int res1 = pthread_mutex_destroy(&p->_mutex);
const int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
@@ -549,6 +556,18 @@ LONG InterlockedIncrement(LONG volatile *addend)
#endif
}
LONG InterlockedDecrement(LONG volatile *addend)
{
// Print("InterlockedDecrement")
#ifdef USE_HACK_UNSAFE_ATOMIC
LONG val = *addend - 1;
*addend = val;
return val;
#else
return __sync_sub_and_fetch(addend, 1);
#endif
}
#endif // _WIN32
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p)

View File

@@ -1,5 +1,5 @@
/* Xz.c - Xz
2023-04-02 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -52,6 +52,7 @@ void XzCheck_Init(CXzCheck *p, unsigned mode)
case XZ_CHECK_CRC32: p->crc = CRC_INIT_VAL; break;
case XZ_CHECK_CRC64: p->crc64 = CRC64_INIT_VAL; break;
case XZ_CHECK_SHA256: Sha256_Init(&p->sha); break;
default: break;
}
}
@@ -62,6 +63,7 @@ void XzCheck_Update(CXzCheck *p, const void *data, size_t size)
case XZ_CHECK_CRC32: p->crc = CrcUpdate(p->crc, data, size); break;
case XZ_CHECK_CRC64: p->crc64 = Crc64Update(p->crc64, data, size); break;
case XZ_CHECK_SHA256: Sha256_Update(&p->sha, (const Byte *)data, size); break;
default: break;
}
}

View File

@@ -1,5 +1,5 @@
/* XzCrc64.c -- CRC64 calculation
2023-04-02 : Igor Pavlov : Public domain */
2023-12-08 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -8,36 +8,76 @@
#define kCrc64Poly UINT64_CONST(0xC96C5795D7870F42)
#ifdef MY_CPU_LE
#define CRC64_NUM_TABLES 4
#else
#define CRC64_NUM_TABLES 5
UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
// #define Z7_CRC64_DEBUG_BE
#ifdef Z7_CRC64_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif
#ifdef Z7_CRC64_NUM_TABLES
#define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES
#else
#define Z7_CRC64_NUM_TABLES_USE 12
#endif
#if Z7_CRC64_NUM_TABLES_USE < 1
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
#endif
#if Z7_CRC64_NUM_TABLES_USE != 1
#ifndef MY_CPU_BE
UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
#define FUNC_NAME_LE_2(s) XzCrc64UpdateT ## s
#define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s)
#define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC64_NUM_TABLES_USE)
UInt64 Z7_FASTCALL FUNC_NAME_LE (UInt64 v, const void *data, size_t size, const UInt64 *table);
#endif
#ifndef MY_CPU_LE
#define FUNC_NAME_BE_2(s) XzCrc64UpdateBeT ## s
#define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s)
#define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC64_NUM_TABLES_USE)
UInt64 Z7_FASTCALL FUNC_NAME_BE (UInt64 v, const void *data, size_t size, const UInt64 *table);
#endif
typedef UInt64 (Z7_FASTCALL *CRC64_FUNC)(UInt64 v, const void *data, size_t size, const UInt64 *table);
#if defined(MY_CPU_LE)
#define FUNC_REF FUNC_NAME_LE
#elif defined(MY_CPU_BE)
#define FUNC_REF FUNC_NAME_BE
#else
#define FUNC_REF g_Crc64Update
static UInt64 (Z7_FASTCALL *FUNC_REF)(UInt64 v, const void *data, size_t size, const UInt64 *table);
#endif
#endif
MY_ALIGN(64)
static UInt64 g_Crc64Table[256 * Z7_CRC64_NUM_TABLES_USE];
static CRC64_FUNC g_Crc64Update;
UInt64 g_Crc64Table[256 * CRC64_NUM_TABLES];
UInt64 Z7_FASTCALL Crc64Update(UInt64 v, const void *data, size_t size)
{
return g_Crc64Update(v, data, size, g_Crc64Table);
#if Z7_CRC64_NUM_TABLES_USE == 1
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
const UInt64 *table = g_Crc64Table;
const Byte *p = (const Byte *)data;
const Byte *lim = p + size;
for (; p != lim; p++)
v = CRC64_UPDATE_BYTE_2(v, *p);
return v;
#undef CRC64_UPDATE_BYTE_2
#else
return FUNC_REF (v, data, size, g_Crc64Table);
#endif
}
UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size)
{
return g_Crc64Update(CRC64_INIT_VAL, data, size, g_Crc64Table) ^ CRC64_INIT_VAL;
}
Z7_NO_INLINE
void Z7_FASTCALL Crc64GenerateTable(void)
{
UInt32 i;
unsigned i;
for (i = 0; i < 256; i++)
{
UInt64 r = i;
@@ -46,35 +86,55 @@ void Z7_FASTCALL Crc64GenerateTable(void)
r = (r >> 1) ^ (kCrc64Poly & ((UInt64)0 - (r & 1)));
g_Crc64Table[i] = r;
}
for (i = 256; i < 256 * CRC64_NUM_TABLES; i++)
#if Z7_CRC64_NUM_TABLES_USE != 1
#if 1 || 1 && defined(MY_CPU_X86) // low register count
for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i++)
{
const UInt64 r = g_Crc64Table[(size_t)i - 256];
g_Crc64Table[i] = g_Crc64Table[r & 0xFF] ^ (r >> 8);
const UInt64 r0 = g_Crc64Table[(size_t)i];
g_Crc64Table[(size_t)i + 256] = g_Crc64Table[(Byte)r0] ^ (r0 >> 8);
}
#ifdef MY_CPU_LE
g_Crc64Update = XzCrc64UpdateT4;
#else
#else
for (i = 0; i < 256 * (Z7_CRC64_NUM_TABLES_USE - 1); i += 2)
{
#ifndef MY_CPU_BE
UInt64 r0 = g_Crc64Table[(size_t)(i) ];
UInt64 r1 = g_Crc64Table[(size_t)(i) + 1];
r0 = g_Crc64Table[(Byte)r0] ^ (r0 >> 8);
r1 = g_Crc64Table[(Byte)r1] ^ (r1 >> 8);
g_Crc64Table[(size_t)i + 256 ] = r0;
g_Crc64Table[(size_t)i + 256 + 1] = r1;
}
#endif
#ifndef MY_CPU_LE
{
#ifndef MY_CPU_BE
UInt32 k = 1;
if (*(const Byte *)&k == 1)
g_Crc64Update = XzCrc64UpdateT4;
FUNC_REF = FUNC_NAME_LE;
else
#endif
#endif
{
for (i = 256 * CRC64_NUM_TABLES - 1; i >= 256; i--)
#ifndef MY_CPU_BE
FUNC_REF = FUNC_NAME_BE;
#endif
for (i = 0; i < 256 * Z7_CRC64_NUM_TABLES_USE; i++)
{
const UInt64 x = g_Crc64Table[(size_t)i - 256];
const UInt64 x = g_Crc64Table[i];
g_Crc64Table[i] = Z7_BSWAP64(x);
}
g_Crc64Update = XzCrc64UpdateT1_BeT4;
}
}
#endif
#endif // ndef MY_CPU_LE
#endif // Z7_CRC64_NUM_TABLES_USE != 1
}
#undef kCrc64Poly
#undef CRC64_NUM_TABLES
#undef Z7_CRC64_NUM_TABLES_USE
#undef FUNC_REF
#undef FUNC_NAME_LE_2
#undef FUNC_NAME_LE_1
#undef FUNC_NAME_LE
#undef FUNC_NAME_BE_2
#undef FUNC_NAME_BE_1
#undef FUNC_NAME_BE

View File

@@ -1,61 +1,261 @@
/* XzCrc64Opt.c -- CRC64 calculation
2023-04-02 : Igor Pavlov : Public domain */
/* XzCrc64Opt.c -- CRC64 calculation (optimized functions)
2023-12-08 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1
// for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
// #define Z7_CRC64_DEBUG_BE
#ifdef Z7_CRC64_DEBUG_BE
#undef MY_CPU_LE
#define MY_CPU_BE
#endif
#if defined(MY_CPU_64BIT)
#define Z7_CRC64_USE_64BIT
#endif
// the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c
#ifdef Z7_CRC64_NUM_TABLES
#define Z7_CRC64_NUM_TABLES_USE Z7_CRC64_NUM_TABLES
#else
#define Z7_CRC64_NUM_TABLES_USE 12
#endif
#if Z7_CRC64_NUM_TABLES_USE % 4 || \
Z7_CRC64_NUM_TABLES_USE < 4 || \
Z7_CRC64_NUM_TABLES_USE > 4 * 4
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
#ifndef MY_CPU_BE
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
#define Q64LE(n, d) \
( (table + ((n) * 8 + 7) * 0x100)[((d) ) & 0xFF] \
^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \
^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \
^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \
^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \
^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] )
#define R64(a) *((const UInt64 *)(const void *)p + (a))
#else
#define Q32LE(n, d) \
( (table + ((n) * 4 + 3) * 0x100)[((d) ) & 0xFF] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
#define R32(a) *((const UInt32 *)(const void *)p + (a))
#endif
#define CRC64_FUNC_PRE_LE2(step) \
UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
#define CRC64_FUNC_PRE_LE(step) \
CRC64_FUNC_PRE_LE2(step); \
CRC64_FUNC_PRE_LE2(step)
CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE)
{
const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
const Byte *lim;
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC64_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
lim = p + size;
if (size >= Z7_CRC64_NUM_TABLES_USE)
{
const UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p;
v = (v >> 32)
^ (table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
^ (table + 0x100)[((d >> 16) & 0xFF)]
^ (table + 0x000)[((d >> 24))];
lim -= Z7_CRC64_NUM_TABLES_USE;
do
{
#if Z7_CRC64_NUM_TABLES_USE == 4
const UInt32 d = (UInt32)v ^ R32(0);
v = (v >> 32) ^ Q32LE(0, d);
#elif Z7_CRC64_NUM_TABLES_USE == 8
#ifdef Z7_CRC64_USE_64BIT
v ^= R64(0);
v = Q64LE(0, v);
#else
UInt32 v0, v1;
v0 = (UInt32)v ^ R32(0);
v1 = (UInt32)(v >> 32) ^ R32(1);
v = Q32LE(1, v0) ^ Q32LE(0, v1);
#endif
#elif Z7_CRC64_NUM_TABLES_USE == 12
UInt32 w;
UInt32 v0, v1;
v0 = (UInt32)v ^ R32(0);
v1 = (UInt32)(v >> 32) ^ R32(1);
w = R32(2);
v = Q32LE(0, w);
v ^= Q32LE(2, v0) ^ Q32LE(1, v1);
#elif Z7_CRC64_NUM_TABLES_USE == 16
#ifdef Z7_CRC64_USE_64BIT
UInt64 w;
UInt64 x;
w = R64(1); x = Q64LE(0, w);
v ^= R64(0); v = x ^ Q64LE(1, v);
#else
UInt32 v0, v1;
UInt32 r0, r1;
v0 = (UInt32)v ^ R32(0);
v1 = (UInt32)(v >> 32) ^ R32(1);
r0 = R32(2);
r1 = R32(3);
v = Q32LE(1, r0) ^ Q32LE(0, r1);
v ^= Q32LE(3, v0) ^ Q32LE(2, v1);
#endif
#else
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
p += Z7_CRC64_NUM_TABLES_USE;
}
while (p <= lim);
lim += Z7_CRC64_NUM_TABLES_USE;
}
for (; size > 0; size--, p++)
for (; p < lim; p++)
v = CRC64_UPDATE_BYTE_2(v, *p);
return v;
}
#undef CRC64_UPDATE_BYTE_2
#undef R32
#undef R64
#undef Q32LE
#undef Q64LE
#undef CRC64_FUNC_PRE_LE
#undef CRC64_FUNC_PRE_LE2
#endif
#ifndef MY_CPU_LE
#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8))
#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8))
UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
#if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
#define Q64BE(n, d) \
( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \
^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \
^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \
^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \
^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \
^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] )
#ifdef Z7_CRC64_DEBUG_BE
#define R64BE(a) GetBe64a((const UInt64 *)(const void *)p + (a))
#else
#define R64BE(a) *((const UInt64 *)(const void *)p + (a))
#endif
#else
#define Q32BE(n, d) \
( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
#ifdef Z7_CRC64_DEBUG_BE
#define R32BE(a) GetBe32a((const UInt32 *)(const void *)p + (a))
#else
#define R32BE(a) *((const UInt32 *)(const void *)p + (a))
#endif
#endif
#define CRC64_FUNC_PRE_BE2(step) \
UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
#define CRC64_FUNC_PRE_BE(step) \
CRC64_FUNC_PRE_BE2(step); \
CRC64_FUNC_PRE_BE2(step)
CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)
{
const Byte *p = (const Byte *)data;
table += 0x100;
const Byte *lim;
v = Z7_BSWAP64(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
v = CRC64_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
lim = p + size;
if (size >= Z7_CRC64_NUM_TABLES_USE)
{
const UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p;
v = (v << 32)
^ (table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]
^ (table + 0x200)[((d >> 16) & 0xFF)]
^ (table + 0x300)[((d >> 24))];
lim -= Z7_CRC64_NUM_TABLES_USE;
do
{
#if Z7_CRC64_NUM_TABLES_USE == 4
const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0);
v = (v << 32) ^ Q32BE(0, d);
#elif Z7_CRC64_NUM_TABLES_USE == 12
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
const UInt32 w = R32BE(2);
v = Q32BE(0, w);
v ^= Q32BE(2, d1) ^ Q32BE(1, d0);
#elif Z7_CRC64_NUM_TABLES_USE == 8
#ifdef Z7_CRC64_USE_64BIT
v ^= R64BE(0);
v = Q64BE(0, v);
#else
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
v = Q32BE(1, d1) ^ Q32BE(0, d0);
#endif
#elif Z7_CRC64_NUM_TABLES_USE == 16
#ifdef Z7_CRC64_USE_64BIT
const UInt64 w = R64BE(1);
v ^= R64BE(0);
v = Q64BE(0, w) ^ Q64BE(1, v);
#else
const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
const UInt32 d0 = (UInt32)(v ) ^ R32BE(1);
const UInt32 w1 = R32BE(2);
const UInt32 w0 = R32BE(3);
v = Q32BE(1, w1) ^ Q32BE(0, w0);
v ^= Q32BE(3, d1) ^ Q32BE(2, d0);
#endif
#elif
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
p += Z7_CRC64_NUM_TABLES_USE;
}
while (p <= lim);
lim += Z7_CRC64_NUM_TABLES_USE;
}
for (; size > 0; size--, p++)
for (; p < lim; p++)
v = CRC64_UPDATE_BYTE_2_BE(v, *p);
return Z7_BSWAP64(v);
}
#undef CRC64_UPDATE_BYTE_2_BE
#undef R32BE
#undef R64BE
#undef Q32BE
#undef Q64BE
#undef CRC64_FUNC_PRE_BE
#undef CRC64_FUNC_PRE_BE2
#endif
#undef Z7_CRC64_NUM_TABLES_USE
#endif

View File

@@ -1,5 +1,5 @@
/* XzDec.c -- Xz Decode
2023-04-13 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -105,30 +105,32 @@ static SRes XzBcFilterState_SetProps(void *pp, const Byte *props, size_t propSiz
{
if (propSize != 1)
return SZ_ERROR_UNSUPPORTED;
p->delta = (unsigned)props[0] + 1;
p->delta = (UInt32)props[0] + 1;
}
else
{
if (propSize == 4)
{
UInt32 v = GetUi32(props);
const UInt32 v = GetUi32(props);
switch (p->methodId)
{
case XZ_ID_PPC:
case XZ_ID_ARM:
case XZ_ID_SPARC:
case XZ_ID_ARM64:
if ((v & 3) != 0)
if (v & 3)
return SZ_ERROR_UNSUPPORTED;
break;
case XZ_ID_ARMT:
if ((v & 1) != 0)
case XZ_ID_RISCV:
if (v & 1)
return SZ_ERROR_UNSUPPORTED;
break;
case XZ_ID_IA64:
if ((v & 0xF) != 0)
if (v & 0xf)
return SZ_ERROR_UNSUPPORTED;
break;
default: break;
}
p->ip = v;
}
@@ -151,12 +153,13 @@ static void XzBcFilterState_Init(void *pp)
static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Dec[] =
{
Z7_BRANCH_CONV_DEC(PPC),
Z7_BRANCH_CONV_DEC(IA64),
Z7_BRANCH_CONV_DEC(ARM),
Z7_BRANCH_CONV_DEC(ARMT),
Z7_BRANCH_CONV_DEC(SPARC),
Z7_BRANCH_CONV_DEC(ARM64)
Z7_BRANCH_CONV_DEC_2 (BranchConv_PPC),
Z7_BRANCH_CONV_DEC_2 (BranchConv_IA64),
Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM),
Z7_BRANCH_CONV_DEC_2 (BranchConv_ARMT),
Z7_BRANCH_CONV_DEC_2 (BranchConv_SPARC),
Z7_BRANCH_CONV_DEC_2 (BranchConv_ARM64),
Z7_BRANCH_CONV_DEC_2 (BranchConv_RISCV)
};
static SizeT XzBcFilterStateBase_Filter_Dec(CXzBcFilterStateBase *p, Byte *data, SizeT size)
@@ -262,7 +265,7 @@ static SRes XzBcFilterState_Code2(void *pp,
#define XZ_IS_SUPPORTED_FILTER_ID(id) \
((id) >= XZ_ID_Delta && (id) <= XZ_ID_ARM64)
((id) >= XZ_ID_Delta && (id) <= XZ_ID_RISCV)
SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id,
Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc)
@@ -541,13 +544,12 @@ static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 met
{
IStateCoder *sc = &p->coders[coderIndex];
p->ids[coderIndex] = methodId;
switch (methodId)
{
case XZ_ID_LZMA2: return Lzma2State_SetFromMethod(sc, outBuf, outBufSize, p->alloc);
#ifdef USE_SUBBLOCK
case XZ_ID_Subblock: return SbState_SetFromMethod(sc, p->alloc);
#endif
}
if (methodId == XZ_ID_LZMA2)
return Lzma2State_SetFromMethod(sc, outBuf, outBufSize, p->alloc);
#ifdef USE_SUBBLOCK
if (methodId == XZ_ID_Subblock)
return SbState_SetFromMethod(sc, p->alloc);
#endif
if (coderIndex == 0)
return SZ_ERROR_UNSUPPORTED;
return Xz_StateCoder_Bc_SetFromMethod_Func(sc, methodId,
@@ -558,10 +560,8 @@ static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 met
static SRes MixCoder_ResetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 methodId, Byte *outBuf, size_t outBufSize)
{
IStateCoder *sc = &p->coders[coderIndex];
switch (methodId)
{
case XZ_ID_LZMA2: return Lzma2State_ResetOutBuf(sc, outBuf, outBufSize);
}
if (methodId == XZ_ID_LZMA2)
return Lzma2State_ResetOutBuf(sc, outBuf, outBufSize);
return SZ_ERROR_UNSUPPORTED;
}
@@ -804,7 +804,7 @@ static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte
}
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \
{ unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
{ const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
if (s == 0) return SZ_ERROR_ARCHIVE; \
pos += s; }
@@ -1034,7 +1034,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
SRes res;
ECoderFinishMode finishMode2 = finishMode;
BoolInt srcFinished2 = srcFinished;
BoolInt srcFinished2 = (BoolInt)srcFinished;
BoolInt destFinish = False;
if (p->block.packSize != (UInt64)(Int64)-1)
@@ -1127,7 +1127,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
return SZ_OK;
}
switch (p->state)
switch ((int)p->state)
{
case XZ_STATE_STREAM_HEADER:
{
@@ -1172,15 +1172,15 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
p->state = XZ_STATE_STREAM_INDEX;
break;
}
p->blockHeaderSize = ((UInt32)p->buf[0] << 2) + 4;
p->blockHeaderSize = ((unsigned)p->buf[0] << 2) + 4;
break;
}
if (p->pos != p->blockHeaderSize)
{
UInt32 cur = p->blockHeaderSize - p->pos;
unsigned cur = p->blockHeaderSize - p->pos;
if (cur > srcRem)
cur = (UInt32)srcRem;
cur = (unsigned)srcRem;
memcpy(p->buf + p->pos, src, cur);
p->pos += cur;
(*srcLen) += cur;
@@ -1222,8 +1222,8 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
}
else
{
UInt32 checkSize = XzFlags_GetCheckSize(p->streamFlags);
UInt32 cur = checkSize - p->pos;
const unsigned checkSize = XzFlags_GetCheckSize(p->streamFlags);
unsigned cur = checkSize - p->pos;
if (cur != 0)
{
if (srcRem == 0)
@@ -1232,7 +1232,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
return SZ_OK;
}
if (cur > srcRem)
cur = (UInt32)srcRem;
cur = (unsigned)srcRem;
memcpy(p->buf + p->pos, src, cur);
p->pos += cur;
(*srcLen) += cur;
@@ -1321,9 +1321,9 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
case XZ_STATE_STREAM_FOOTER:
{
UInt32 cur = XZ_STREAM_FOOTER_SIZE - p->pos;
unsigned cur = XZ_STREAM_FOOTER_SIZE - p->pos;
if (cur > srcRem)
cur = (UInt32)srcRem;
cur = (unsigned)srcRem;
memcpy(p->buf + p->pos, src, cur);
p->pos += cur;
(*srcLen) += cur;
@@ -1358,6 +1358,8 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
}
case XZ_STATE_BLOCK: break; /* to disable GCC warning */
default: return SZ_ERROR_FAIL;
}
}
/*
@@ -1773,10 +1775,10 @@ static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbac
}
}
{
UInt64 packSize = block->packSize;
UInt64 packSizeAligned = packSize + ((0 - (unsigned)packSize) & 3);
UInt32 checkSize = XzFlags_GetCheckSize(coder->dec.streamFlags);
UInt64 blockPackSum = coder->inPreSize + packSizeAligned + checkSize;
const UInt64 packSize = block->packSize;
const UInt64 packSizeAligned = packSize + ((0 - (unsigned)packSize) & 3);
const unsigned checkSize = XzFlags_GetCheckSize(coder->dec.streamFlags);
const UInt64 blockPackSum = coder->inPreSize + packSizeAligned + checkSize;
// if (blockPackSum <= me->props.inBlockMax)
// unpackBlockMaxSize
{
@@ -2381,7 +2383,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
if (tMode)
{
XzDecMt_FreeOutBufs(p);
tMode = MtDec_PrepareRead(&p->mtc);
tMode = (BoolInt)MtDec_PrepareRead(&p->mtc);
}
#endif
@@ -2644,7 +2646,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle p,
p->outSize = *outDataSize;
}
p->finishMode = finishMode;
p->finishMode = (BoolInt)finishMode;
// p->outSize = 457; p->outSize_Defined = True; p->finishMode = False; // for test

View File

@@ -1,5 +1,5 @@
/* XzEnc.c -- Xz Encode
2023-04-13 : Igor Pavlov : Public domain */
2024-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -29,8 +29,9 @@
#define XZ_GET_PAD_SIZE(dataSize) ((4 - ((unsigned)(dataSize) & 3)) & 3)
/* max pack size for LZMA2 block + check-64bytrs: */
#define XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize) ((unpackSize) + ((unpackSize) >> 10) + 16 + 64)
#define XZ_CHECK_SIZE_MAX 64
/* max pack size for LZMA2 block + pad4 + check_size: */
#define XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize) ((unpackSize) + ((unpackSize) >> 10) + 16 + XZ_CHECK_SIZE_MAX)
#define XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(unpackSize) (XZ_BLOCK_HEADER_SIZE_MAX + XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize))
@@ -325,12 +326,13 @@ typedef struct
static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Enc[] =
{
Z7_BRANCH_CONV_ENC(PPC),
Z7_BRANCH_CONV_ENC(IA64),
Z7_BRANCH_CONV_ENC(ARM),
Z7_BRANCH_CONV_ENC(ARMT),
Z7_BRANCH_CONV_ENC(SPARC),
Z7_BRANCH_CONV_ENC(ARM64)
Z7_BRANCH_CONV_ENC_2 (BranchConv_PPC),
Z7_BRANCH_CONV_ENC_2 (BranchConv_IA64),
Z7_BRANCH_CONV_ENC_2 (BranchConv_ARM),
Z7_BRANCH_CONV_ENC_2 (BranchConv_ARMT),
Z7_BRANCH_CONV_ENC_2 (BranchConv_SPARC),
Z7_BRANCH_CONV_ENC_2 (BranchConv_ARM64),
Z7_BRANCH_CONV_ENC_2 (BranchConv_RISCV)
};
static SizeT XzBcFilterStateBase_Filter_Enc(CXzBcFilterStateBase *p, Byte *data, SizeT size)
@@ -888,9 +890,9 @@ static SRes Xz_CompressBlock(
blockSizes->unpackSize = checkInStream.processed;
}
{
Byte buf[4 + 64];
unsigned padSize = XZ_GET_PAD_SIZE(seqSizeOutStream.processed);
UInt64 packSize = seqSizeOutStream.processed;
Byte buf[4 + XZ_CHECK_SIZE_MAX];
const unsigned padSize = XZ_GET_PAD_SIZE(seqSizeOutStream.processed);
const UInt64 packSize = seqSizeOutStream.processed;
buf[0] = 0;
buf[1] = 0;
@@ -898,7 +900,8 @@ static SRes Xz_CompressBlock(
buf[3] = 0;
SeqCheckInStream_GetDigest(&checkInStream, buf + 4);
RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize), padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId)))
RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize),
padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId)))
blockSizes->totalSize = seqSizeOutStream.processed - padSize;
@@ -1083,18 +1086,19 @@ static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBuf
CXzEnc *me = (CXzEnc *)pp;
SRes res;
CMtProgressThunk progressThunk;
Byte *dest = me->outBufs[outBufIndex];
Byte *dest;
UNUSED_VAR(finished)
{
CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex];
bInfo->totalSize = 0;
bInfo->unpackSize = 0;
bInfo->headerSize = 0;
// v23.02: we don't compress empty blocks
// also we must ignore that empty block in XzEnc_MtCallback_Write()
if (srcSize == 0)
return SZ_OK;
}
dest = me->outBufs[outBufIndex];
if (!dest)
{
dest = (Byte *)ISzAlloc_Alloc(me->alloc, me->outBufSize);
@@ -1140,18 +1144,20 @@ static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBuf
static SRes XzEnc_MtCallback_Write(void *pp, unsigned outBufIndex)
{
CXzEnc *me = (CXzEnc *)pp;
const CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex];
const Byte *data = me->outBufs[outBufIndex];
RINOK(WriteBytes(me->outStream, data, bInfo->headerSize))
// v23.02: we don't write empty blocks
// note: if (bInfo->unpackSize == 0) then there is no compressed data of block
if (bInfo->unpackSize == 0)
return SZ_OK;
{
UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize);
RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize))
const Byte *data = me->outBufs[outBufIndex];
RINOK(WriteBytes(me->outStream, data, bInfo->headerSize))
{
const UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize);
RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize))
}
return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc);
}
return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc);
}
#endif

View File

@@ -1,5 +1,5 @@
/* XzIn.c - Xz input
2023-04-02 : Igor Pavlov : Public domain */
2023-09-07 : Igor Pavlov : Public domain */
#include "Precomp.h"
@@ -27,7 +27,7 @@ SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream)
}
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \
{ unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
{ const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
if (s == 0) return SZ_ERROR_ARCHIVE; \
pos += s; }
@@ -37,7 +37,7 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex,
unsigned headerSize;
*headerSizeRes = 0;
RINOK(SeqInStream_ReadByte(inStream, &header[0]))
headerSize = (unsigned)header[0];
headerSize = header[0];
if (headerSize == 0)
{
*headerSizeRes = 1;
@@ -47,7 +47,7 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex,
*isIndex = False;
headerSize = (headerSize << 2) + 4;
*headerSizeRes = headerSize;
*headerSizeRes = (UInt32)headerSize;
{
size_t processedSize = headerSize - 1;
RINOK(SeqInStream_ReadMax(inStream, header + 1, &processedSize))
@@ -58,7 +58,7 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex,
}
#define ADD_SIZE_CHECK(size, val) \
{ UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; }
{ const UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; }
UInt64 Xz_GetUnpackSize(const CXzStream *p)
{

View File

@@ -155,7 +155,7 @@ namespace Xbyak {
enum {
DEFAULT_MAX_CODE_SIZE = 4096,
VERSION = 0x7060 /* 0xABCD = A.BC(.D) */
VERSION = 0x7210 /* 0xABCD = A.BC(.D) */
};
#ifndef MIE_INTEGER_TYPE_DEFINED
@@ -232,6 +232,7 @@ enum {
ERR_CANT_USE_REX2,
ERR_INVALID_DFV,
ERR_INVALID_REG_IDX,
ERR_BAD_ENCODING_MODE,
ERR_INTERNAL // Put it at last.
};
@@ -290,6 +291,7 @@ inline const char *ConvertErrorToString(int err)
"can't use rex2",
"invalid dfv",
"invalid reg index",
"bad encoding mode",
"internal error"
};
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
@@ -1673,7 +1675,9 @@ inline const uint8_t* Label::getAddress() const
typedef enum {
DefaultEncoding,
VexEncoding,
EvexEncoding
EvexEncoding,
PreAVX10v2Encoding,
AVX10v2Encoding
} PreferredEncoding;
class CodeGenerator : public CodeArray {
@@ -1730,10 +1734,10 @@ private:
{
return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
}
static inline bool isValidSSE(const Operand& op1)
static inline bool isValidSSE(const Operand& op)
{
// SSE instructions do not support XMM16 - XMM31
return !(op1.isXMM() && op1.getIdx() >= 16);
return !(op.isXMM() && op.getIdx() >= 16);
}
static inline uint8_t rexRXB(int bit, int bit3, const Reg& r, const Reg& b, const Reg& x = Reg())
{
@@ -1867,16 +1871,19 @@ private:
}
db(code);
}
void verifySAE(const Reg& r, uint64_t type) const
// Allow YMM embedded rounding for AVX10.2 to minimize flag modifications
bool verifySAE(const Reg& r, const Reg& b, uint64_t type) const
{
if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return;
XBYAK_THROW(ERR_SAE_IS_INVALID)
if (((type & T_SAE_X) && (r.isYMM() && b.isXMM())) || ((type & T_SAE_Y) && b.isXMM()) || ((type & T_SAE_Z) && b.isYMM())) return true;
if (((type & T_SAE_X) && b.isXMM()) || ((type & T_SAE_Y) && b.isYMM()) || ((type & T_SAE_Z) && b.isZMM())) return false;
XBYAK_THROW_RET(ERR_SAE_IS_INVALID, false)
}
void verifyER(const Reg& r, uint64_t type) const
bool verifyER(const Reg& r, const Reg& b, uint64_t type) const
{
if ((type & T_ER_R) && r.isREG(32|64)) return;
if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
XBYAK_THROW(ERR_ER_IS_INVALID)
if ((type & T_ER_R) && b.isREG(32|64)) return false;
if (((type & T_ER_X) && (r.isYMM() && b.isXMM())) || ((type & T_ER_Y) && b.isXMM()) || ((type & T_ER_Z) && b.isYMM())) return true;
if (((type & T_ER_X) && b.isXMM()) || ((type & T_ER_Y) && b.isYMM()) || ((type & T_ER_Z) && b.isZMM())) return false;
XBYAK_THROW_RET(ERR_SAE_IS_INVALID, false)
}
// (a, b, c) contains non zero two or three values then err
int verifyDuplicate(int a, int b, int c, int err)
@@ -1897,19 +1904,21 @@ private:
bool R = reg.isExtIdx();
bool X3 = (x && x->isExtIdx()) || (base.isSIMD() && base.isExtIdx2());
bool B4 = base.isREG() && base.isExtIdx2();
bool X4 = x && (x->isREG() && x->isExtIdx2());
uint8_t B4 = (base.isREG() && base.isExtIdx2()) ? 8 : 0;
uint8_t U = (x && (x->isREG() && x->isExtIdx2())) ? 0 : 4;
bool B = base.isExtIdx();
bool Rp = reg.isExtIdx2();
int LL;
int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET);
int disp8N = 1;
if (rounding) {
bool isUzero = false;
if (rounding == EvexModifierRounding::T_SAE) {
verifySAE(base, type); LL = 0;
isUzero = verifySAE(reg, base, type); LL = 0;
} else {
verifyER(base, type); LL = rounding - 1;
isUzero = verifyER(reg, base, type); LL = rounding - 1;
}
if (isUzero) U = 0; // avx10.2 Evex.U
b = true;
} else {
if (v) VL = (std::max)(VL, v->getBit());
@@ -1935,8 +1944,8 @@ private:
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
if (aaa == 0) z = 0; // clear T_z if mask is not set
db(0x62);
db((R ? 0 : 0x80) | (X3 ? 0 : 0x40) | (B ? 0 : 0x20) | (Rp ? 0 : 0x10) | (B4 ? 8 : 0) | mmm);
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | (X4 ? 0 : 4) | (pp & 3));
db((R ? 0 : 0x80) | (X3 ? 0 : 0x40) | (B ? 0 : 0x20) | (Rp ? 0 : 0x10) | B4 | mmm);
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | U | (pp & 3));
db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (V4 ? 0 : 8) | (aaa & 7));
db(code);
return disp8N;
@@ -2163,7 +2172,7 @@ private:
}
}
}
void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&), int imm8 = NONE)
void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&) = 0, int imm8 = NONE)
{
if (isValid && !isValid(r, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
if (!isValidSSE(r) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
@@ -2554,6 +2563,18 @@ private:
Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
}
// (x, x, x/m), (x, y, y/m), (y, z, z/m)
void opCvt6(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code)
{
int b1 = x1.getBit();
int b2 = x2.getBit();
int b3 = op.getBit();
if ((b1 == 128 && (b2 == 128 || b2 == 256) && (b2 == b3 || op.isMEM())) || (b1 == 256 && b2 == 512 && (b3 == b2 || op.isMEM()))) {
opVex(x1, &x2, op, type, code);
return;
}
XBYAK_THROW(ERR_BAD_COMBINATION);
}
const Xmm& cvtIdx0(const Operand& x) const
{
return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
@@ -2644,21 +2665,24 @@ private:
if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
opVex(x, 0, addr, type, code);
}
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding encoding)
void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0)
{
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(encoding), code);
opAVX_X_X_XM(x1, x2, op, type | orEvexIf(enc, typeVex, typeEvex, sel), code, imm);
}
int orEvexIf(PreferredEncoding encoding) {
if (encoding == DefaultEncoding) {
encoding = defaultEncoding_;
PreferredEncoding getEncoding(PreferredEncoding enc, int sel) const
{
if (enc == DefaultEncoding) {
enc = defaultEncoding_[sel];
}
if (encoding == EvexEncoding) {
if ((sel == 0 && enc != VexEncoding && enc != EvexEncoding) || (sel == 1 && enc != PreAVX10v2Encoding && enc != AVX10v2Encoding)) XBYAK_THROW_RET(ERR_BAD_ENCODING_MODE, VexEncoding)
#ifdef XBYAK_DISABLE_AVX512
XBYAK_THROW(ERR_EVEX_IS_INVALID)
if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW(ERR_EVEX_IS_INVALID)
#endif
return T_MUST_EVEX;
}
return 0;
return enc;
}
uint64_t orEvexIf(PreferredEncoding enc, uint64_t typeVex, uint64_t typeEvex, int sel) {
enc = getEncoding(enc, sel);
return ((sel == 0 && enc == VexEncoding) || (sel == 1 && enc != AVX10v2Encoding)) ? typeVex : (T_MUST_EVEX | typeEvex);
}
void opInOut(const Reg& a, const Reg& d, uint8_t code)
{
@@ -2770,6 +2794,31 @@ private:
}
opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm);
}
// AVX10 zero-extending for vmovd, vmovw
void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit)
{
const Operand *p1 = &op1;
const Operand *p2 = &op2;
bool rev = false;
if (p1->isMEM()) {
std::swap(p1, p2);
rev = true;
}
if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION)
if (p1->isXMM()) {
std::swap(p1, p2);
rev = !rev;
}
enc = getEncoding(enc, 1);
int sel = -1;
if (p1->isXMM() || (p1->isMEM() && enc == AVX10v2Encoding)) {
sel = 2 + int(rev);
} else if (p1->isREG(bit) || p1->isMEM()) {
sel = int(rev);
}
if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION)
opAVX_X_X_XM(*static_cast<const Xmm*>(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]);
}
public:
unsigned int getVersion() const { return VERSION; }
using CodeArray::db;
@@ -2828,7 +2877,7 @@ public:
#endif
private:
bool isDefaultJmpNEAR_;
PreferredEncoding defaultEncoding_;
PreferredEncoding defaultEncoding_[2]; // 0:vnni, 1:vmpsadbw
public:
void L(const std::string& label) { labelMgr_.defineSlabel(label); }
void L(Label& label) { labelMgr_.defineClabel(label); }
@@ -2999,6 +3048,7 @@ public:
rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7));
return;
}
if (p1->isREG() && p2->isREG()) std::swap(p1, p2); // adapt to NASM 2.16.03 behavior to pass tests
opRO(static_cast<const Reg&>(*p1), *p2, 0, 0x86 | (p1->isBit(8) ? 0 : 1), (p1->isREG() && (p1->getBit() == p2->getBit())));
}
@@ -3113,8 +3163,9 @@ public:
, es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs)
#endif
, isDefaultJmpNEAR_(false)
, defaultEncoding_(EvexEncoding)
{
setDefaultEncoding();
setDefaultEncodingAVX10();
labelMgr_.set(this);
}
void reset()
@@ -3151,13 +3202,20 @@ public:
#undef jnl
#endif
// set default encoding to select Vex or Evex
void setDefaultEncoding(PreferredEncoding encoding) { defaultEncoding_ = encoding; }
void sha1msg12(const Xmm& x, const Operand& op)
// set default encoding of VNNI
// EvexEncoding : AVX512_VNNI, VexEncoding : AVX-VNNI
void setDefaultEncoding(PreferredEncoding enc = EvexEncoding)
{
opROO(Reg(), op, x, T_MUST_EVEX, 0xD9);
if (enc != VexEncoding && enc != EvexEncoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
defaultEncoding_[0] = enc;
}
// default : PreferredEncoding : AVX-VNNI-INT8/AVX512-FP16
void setDefaultEncodingAVX10(PreferredEncoding enc = PreAVX10v2Encoding)
{
if (enc != PreAVX10v2Encoding && enc != AVX10v2Encoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE)
defaultEncoding_[1] = enc;
}
void bswap(const Reg32e& r)
{
int idx = r.getIdx();
@@ -3170,6 +3228,24 @@ public:
}
db(0xC8 + (idx & 7));
}
void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512
T_MUST_EVEX|T_66|T_0F|T_EW0|T_N4, T_MUST_EVEX|T_F3|T_0F|T_EW0|T_N4, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 32);
}
void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding)
{
const uint64_t typeTbl[] = {
T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16
T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_EW0|T_N2, // avx10.2
};
const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E };
opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 16|32|64);
}
/*
use single byte nop if useMultiByteNop = false
*/

View File

@@ -1,4 +1,4 @@
const char *getVersionString() const { return "7.06"; }
const char *getVersionString() const { return "7.21"; }
void aadd(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); }
void aand(const Address& addr, const Reg32e &reg) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); }
void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); }
@@ -645,7 +645,7 @@ void jz(const char *label, LabelType type = T_AUTO) { jz(std::string(label), typ
void jz(const void *addr) { opJmpAbs(addr, T_NEAR, 0x74, 0x84, 0x0F); }//-V524
void jz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }//-V524
void lahf() { db(0x9F); }
void lddqu(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_F2 | T_0F, 0xF0); }
void lddqu(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_F2 | T_0F, 0xF0); }
void ldmxcsr(const Address& addr) { opMR(addr, Reg32(2), T_0F, 0xAE); }
void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opMR(addr, reg, 0, 0x8D); }
void leave() { db(0xC9); }
@@ -667,8 +667,8 @@ void loopne(const char *label) { loopne(std::string(label)); }
void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); }
void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_0F, 0xB2); }
void lzcnt(const Reg&reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xF5)) return; opCnt(reg, op, 0xBD); }
void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, T_66|T_0F, 0xF7); }
void maskmovq(const Mmx& reg1, const Mmx& reg2) { if (!reg1.isMMX() || !reg2.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opRR(reg1, reg2, T_0F, 0xF7); }
void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, T_66|T_0F, 0xF7); }
void maskmovq(const Mmx& reg1, const Mmx& reg2) { opSSE(reg1, reg2, T_0F, 0xF7); }
void maxpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5F, isXMM_XMMorMEM); }
void maxps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5F, isXMM_XMMorMEM); }
void maxsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5F, isXMM_XMMorMEM); }
@@ -680,54 +680,52 @@ void minsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5D
void minss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5D, isXMM_XMMorMEM); }
void monitor() { db(0x0F); db(0x01); db(0xC8); }
void monitorx() { db(0x0F); db(0x01); db(0xFA); }
void movapd(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x29); }
void movapd(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_66, 0x29); }
void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_66); }
void movaps(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_NONE, 0x29); }
void movaps(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_NONE, 0x29); }
void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_NONE); }
void movbe(const Address& addr, const Reg& reg) { opMR(addr, reg, T_0F38, 0xF1, T_APX, 0x61); }
void movbe(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, 0xF0, T_APX, 0x60); }
void movd(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x7E); }
void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, 0x6E); }
void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }
void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }
void movd(const Mmx& mmx, const Operand& op) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x6E); }
void movd(const Operand& op, const Mmx& mmx) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F, 0x7E); }
void movddup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12, isXMM_XMMorMEM, NONE); }
void movdir64b(const Reg& reg, const Address& addr) { opMR(addr, reg.cvt32(), T_66|T_0F38, 0xF8, T_APX|T_66); }
void movdiri(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F38, 0xF9, T_APX); }
void movdq2q(const Mmx& mmx, const Xmm& xmm) { opRR(mmx, xmm, T_F2 | T_0F, 0xD6); }
void movdqa(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x7F); }
void movdq2q(const Mmx& mmx, const Xmm& xmm) { opSSE(mmx, xmm, T_F2 | T_0F, 0xD6); }
void movdqa(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_66, 0x7F); }
void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_66); }
void movdqu(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_F3, 0x7F); }
void movdqu(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_F3, 0x7F); }
void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_F3); }
void movhlps(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, T_0F, 0x12); }
void movhlps(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, T_0F, 0x12); }
void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_66|T_0F, 0x16); }
void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_0F, 0x16); }
void movlhps(const Xmm& reg1, const Xmm& reg2) { opRR(reg1, reg2, T_0F, 0x16); }
void movlhps(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, T_0F, 0x16); }
void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_66|T_0F, 0x12); }
void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_0F, 0x12); }
void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); }
void movmskps(const Reg32e& reg, const Xmm& xmm) { opRR(reg, xmm, T_0F, 0x50); }
void movntdq(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0xE7); }
void movntdqa(const Xmm& xmm, const Address& addr) { opMR(addr, xmm, T_66 | T_0F38, 0x2A); }
void movmskps(const Reg32e& reg, const Xmm& xmm) { opSSE(reg, xmm, T_0F, 0x50); }
void movntdq(const Address& addr, const Xmm& reg) { if (reg.getIdx() >= 16) XBYAK_THROW(ERR_BAD_PARAMETER) opSSE(Reg16(reg.getIdx()), addr, T_0F, 0xE7); }
void movntdqa(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_66 | T_0F38, 0x2A); }
void movnti(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F, 0xC3); }
void movntpd(const Address& addr, const Xmm& reg) { opMR(addr, Reg16(reg.getIdx()), T_0F, 0x2B); }
void movntps(const Address& addr, const Xmm& xmm) { opMR(addr, Mmx(xmm.getIdx()), T_0F, 0x2B); }
void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opMR(addr, mmx, T_0F, 0xE7); }
void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opMR(addr, mmx, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }
void movq(const Mmx& mmx, const Operand& op) { if (mmx.isXMM()) db(0xF3); opRO(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F, mmx.getKind() == op.getKind()); }
void movq2dq(const Xmm& xmm, const Mmx& mmx) { opRR(xmm, mmx, T_F3 | T_0F, 0xD6); }
void movntpd(const Address& addr, const Xmm& reg) { if (reg.getIdx() >= 16) XBYAK_THROW(ERR_BAD_PARAMETER) opSSE(Reg16(reg.getIdx()), addr, T_0F, 0x2B); }
void movntps(const Address& addr, const Xmm& xmm) { opSSE(Xmm(xmm.getIdx()), addr, T_0F, 0x2B); }
void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, addr, T_0F, 0xE7); }
void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, addr, T_0F, mmx.isXMM() ? 0xD6 : 0x7F); }
void movq(const Mmx& mmx, const Operand& op) { if (!op.isMEM() && mmx.getKind() != op.getKind()) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0xF3); opSSE(mmx, op, T_0F, mmx.isXMM() ? 0x7E : 0x6F); }
void movq2dq(const Xmm& xmm, const Mmx& mmx) { opSSE(xmm, mmx, T_F3 | T_0F, 0xD6); }
void movsb() { db(0xA4); }
void movsd() { db(0xA5); }
void movsd(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_F2, 0x11); }
void movsd(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_F2, 0x11); }
void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_F2); }
void movshdup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX, 0x16, isXMM_XMMorMEM, NONE); }
void movsldup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX, 0x12, isXMM_XMMorMEM, NONE); }
void movss(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_F3, 0x11); }
void movss(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_F3, 0x11); }
void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_F3); }
void movsw() { db(0x66); db(0xA5); }
void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); }
void movupd(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_66, 0x11); }
void movupd(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_66, 0x11); }
void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_66); }
void movups(const Address& addr, const Xmm& xmm) { opMR(addr, xmm, T_0F|T_NONE, 0x11); }
void movups(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_NONE, 0x11); }
void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_NONE); }
void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); }
void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x42, isXMM_XMMorMEM, static_cast<uint8_t>(imm)); }
@@ -823,7 +821,7 @@ void pminsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEA); }
void pminub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDA); }
void pminud(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3B, isXMM_XMMorMEM); }
void pminuw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3A, isXMM_XMMorMEM); }
void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(reg, mmx, T_0F, 0xD7); }
void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(reg, mmx, T_0F, 0xD7); }
void pmovsxbd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x21, isXMM_XMMorMEM, NONE); }
void pmovsxbq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x22, isXMM_XMMorMEM, NONE); }
void pmovsxbw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x20, isXMM_XMMorMEM, NONE); }
@@ -1059,10 +1057,10 @@ void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F38|T_YMM|T_EVEX, 0xDD); }
void vaesimc(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_W0, 0xDB); }
void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0xDF, imm); }
void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x55); }
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x55); }
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x54); }
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x54); }
void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x55); }
void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x55); }
void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x54); }
void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x54); }
void vbcstnebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|T_0F38|T_W0|T_YMM|T_B16, 0xB1); }
void vbcstnesh2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66|T_0F38|T_W0|T_YMM|T_B16, 0xB1); }
void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x0D, imm); }
@@ -1213,7 +1211,6 @@ void vcvtneebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|
void vcvtneeph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66|T_0F38|T_W0|T_YMM, 0xB0); }
void vcvtneobf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2|T_0F38|T_W0|T_YMM, 0xB0); }
void vcvtneoph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38|T_W0|T_YMM, 0xB0); }
void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32|orEvexIf(encoding), 0x72); }
void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); }
void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); }
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
@@ -1226,7 +1223,7 @@ void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2,
void vcvtsi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F3 | T_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }
void vcvtss2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_F3|T_0F|T_EW0|T_EVEX|T_SAE_X, 0x5A); }
void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_ER_X | T_N8, 0x2D); }
void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }
void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_SAE_Z, 0xE6); }
void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_EW0|T_YMM|T_EVEX|T_SAE_Z|T_B32, 0x5B); }
void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_SAE_X, 0x2C); }
void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }
@@ -1239,64 +1236,64 @@ void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX
void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); }
void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); }
void vfmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x98); }
void vfmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x98); }
void vfmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x98); }
void vfmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x98); }
void vfmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x99); }
void vfmadd132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0x99); }
void vfmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xA8); }
void vfmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xA8); }
void vfmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xA8); }
void vfmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xA8); }
void vfmadd213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xA9); }
void vfmadd213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xA9); }
void vfmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xB8); }
void vfmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xB8); }
void vfmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xB8); }
void vfmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xB8); }
void vfmadd231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xB9); }
void vfmadd231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xB9); }
void vfmaddsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x96); }
void vfmaddsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x96); }
void vfmaddsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xA6); }
void vfmaddsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xA6); }
void vfmaddsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xB6); }
void vfmaddsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xB6); }
void vfmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x9A); }
void vfmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x9A); }
void vfmaddsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x96); }
void vfmaddsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x96); }
void vfmaddsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xA6); }
void vfmaddsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xA6); }
void vfmaddsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xB6); }
void vfmaddsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xB6); }
void vfmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x9A); }
void vfmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x9A); }
void vfmsub132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x9B); }
void vfmsub132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0x9B); }
void vfmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xAA); }
void vfmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xAA); }
void vfmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xAA); }
void vfmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xAA); }
void vfmsub213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xAB); }
void vfmsub213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xAB); }
void vfmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xBA); }
void vfmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xBA); }
void vfmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xBA); }
void vfmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xBA); }
void vfmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xBB); }
void vfmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xBB); }
void vfmsubadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x97); }
void vfmsubadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x97); }
void vfmsubadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xA7); }
void vfmsubadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xA7); }
void vfmsubadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xB7); }
void vfmsubadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xB7); }
void vfnmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x9C); }
void vfnmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x9C); }
void vfmsubadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x97); }
void vfmsubadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x97); }
void vfmsubadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xA7); }
void vfmsubadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xA7); }
void vfmsubadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xB7); }
void vfmsubadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xB7); }
void vfnmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x9C); }
void vfnmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x9C); }
void vfnmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x9D); }
void vfnmadd132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0x9D); }
void vfnmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xAC); }
void vfnmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xAC); }
void vfnmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xAC); }
void vfnmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xAC); }
void vfnmadd213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xAD); }
void vfnmadd213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xAD); }
void vfnmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xBC); }
void vfnmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xBC); }
void vfnmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xBC); }
void vfnmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xBC); }
void vfnmadd231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xBD); }
void vfnmadd231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xBD); }
void vfnmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x9E); }
void vfnmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x9E); }
void vfnmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x9E); }
void vfnmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x9E); }
void vfnmsub132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x9F); }
void vfnmsub132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0x9F); }
void vfnmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xAE); }
void vfnmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xAE); }
void vfnmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xAE); }
void vfnmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xAE); }
void vfnmsub213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xAF); }
void vfnmsub213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xAF); }
void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0xBE); }
void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0xBE); }
void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xBE); }
void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xBE); }
void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xBF); }
void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EW0|T_EVEX|T_ER_X, 0xBF); }
void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); }
@@ -1320,20 +1317,18 @@ void vmaskmovpd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_X
void vmaskmovpd(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2D); }
void vmaskmovps(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2E); }
void vmaskmovps(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2C); }
void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5F); }
void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5F); }
void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5F); }
void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5F); }
void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5D); }
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5D); }
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5D); }
void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0x5F); }
void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_SAE_Z | T_B32, 0x5F); }
void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_SAE_X | T_N8, 0x5F); }
void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_SAE_X | T_N4, 0x5F); }
void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0x5D); }
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_SAE_Z | T_B32, 0x5D); }
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_SAE_X | T_N8, 0x5D); }
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_SAE_X | T_N4, 0x5D); }
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_M_K, 0x29); }
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x28); }
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x29); }
void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x28); }
void vmovd(const Operand& op, const Xmm& x) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x7E); }
void vmovd(const Xmm& x, const Operand& op) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_0F | T_66 | T_W0 | T_EVEX | T_N4, 0x6E); }
void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_X|T_ER_Y|T_ER_Z, 0x12); }
void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_YMM, 0x7F); }
void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_YMM, 0x6F); }
@@ -1370,13 +1365,12 @@ void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_
void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x10); }
void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_EW0|T_YMM|T_EVEX|T_M_K, 0x11); }
void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_EW0|T_YMM|T_EVEX, 0x10); }
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x42, imm); }
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x59); }
void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x59); }
void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x56); }
void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x56); }
void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x56); }
void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x56); }
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM|T_EVEX, 0x1C); }
void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_EW0|T_YMM|T_EVEX|T_B32, 0x1E); }
void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM|T_EVEX, 0x1D); }
@@ -1421,22 +1415,10 @@ void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1
void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x65); }
void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x63, imm); }
void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x62, imm); }
void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x50); }
void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x51); }
void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x50); }
void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x51); }
void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x50, encoding); }
void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x51, encoding); }
void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0x50); }
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0x51); }
void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x52, encoding); }
void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_B32, 0x53, encoding); }
void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD2); }
void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD3); }
void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD2); }
void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD3); }
void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD2); }
void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD3); }
void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); }
void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); }
void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_W0|T_EW0|T_YMM|T_EVEX|T_B32, 0x36); }
@@ -1468,8 +1450,6 @@ void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if
void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x22, imm); }
void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); }
void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); }
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_B64, 0xB5, encoding); }
void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_B64, 0xB4, encoding); }
void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x04); }
void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xF5); }
void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); }
@@ -1593,8 +1573,8 @@ void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x
void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0x15); }
void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0x14); }
void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F|T_EW0|T_YMM|T_EVEX|T_B32, 0x14); }
void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x57); }
void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x57); }
void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x57); }
void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x57); }
void vzeroall() { db(0xC5); db(0xFC); db(0x77); }
void vzeroupper() { db(0xC5); db(0xF8); db(0x77); }
void wait() { db(0x9B); }
@@ -1612,6 +1592,8 @@ void xor_(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_N
void xor_(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x30); }
void xorpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x57, isXMM_XMMorMEM); }
void xorps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x57, isXMM_XMMorMEM); }
void xresldtrk() { db(0xF2); db(0x0F); db(0x01); db(0xE9); }
void xsusldtrk() { db(0xF2); db(0x0F); db(0x01); db(0xE8); }
#ifdef XBYAK_ENABLE_OMITTED_OPERAND
void vblendpd(const Xmm& x, const Operand& op, uint8_t imm) { vblendpd(x, x, op, imm); }
void vblendps(const Xmm& x, const Operand& op, uint8_t imm) { vblendps(x, x, op, imm); }
@@ -1892,8 +1874,8 @@ void testui() { db(0xF3); db(0x0F); db(0x01); db(0xED); }
void uiret() { db(0xF3); db(0x0F); db(0x01); db(0xEC); }
void cmpxchg16b(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xC7); }
void fxrstor64(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xAE); }
void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x7E); }
void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opRR(mmx, reg, T_0F, 0x6E); }
void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x7E); }
void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x6E); }
void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, 0, 0x63); }
void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); }
void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); }
@@ -1935,6 +1917,10 @@ void aesencwide128kl(const Address& addr) { opSSE_APX(xmm0, addr, T_F3|T_0F38, 0
void aesencwide256kl(const Address& addr) { opSSE_APX(xmm2, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); }
void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); }
void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); }
void rdfsbase(const Reg32e& r) { opRR(eax, r, T_F3|T_0F, 0xAE); }
void rdgsbase(const Reg32e& r) { opRR(ecx, r, T_F3|T_0F, 0xAE); }
void wrfsbase(const Reg32e& r) { opRR(edx, r, T_F3|T_0F, 0xAE); }
void wrgsbase(const Reg32e& r) { opRR(ebx, r, T_F3|T_0F, 0xAE); }
void ldtilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_0F38|T_W0, 0x49); }
void sttilecfg(const Address& addr) { if (opROO(Reg(), addr, tmm0, T_APX|T_66|T_0F38|T_W0, 0x49)) return; opVex(tmm0, &tmm0, addr, T_66|T_0F38 | T_W0, 0x49); }
void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); }
@@ -2041,6 +2027,7 @@ void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM
void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }
void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }
void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }
void vaddnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x58); }
void vaddph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x58); }
void vaddsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x58); }
void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x03, imm); }
@@ -2169,6 +2156,7 @@ void vcmpordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x,
void vcmpordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 7); }
void vcmpordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 7); }
void vcmpordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 7); }
void vcmppbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0xC2, imm); }
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0xC2, imm); }
void vcmpph(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0xC2, imm); }
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0xC2, imm); }
@@ -2191,18 +2179,39 @@ void vcmpunordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x
void vcmpunordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 3); }
void vcmpunordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 3); }
void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 3); }
void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }
void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x63); }
void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8A); }
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8A); }
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); }
void vcomsbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_MAP5|T_EW0|T_MUST_EVEX, 0x2F); }
void vcomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F2|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2F); }
void vcomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
void vcomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F3|T_0F|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2F); }
void vcvt2ps2phx(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x67); }
void vcvtbiasph2bf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
void vcvtbiasph2bf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
void vcvtbiasph2hf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x18); }
void vcvtbiasph2hf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); }
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x5B); }
void vcvthf82ph(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_MUST_EVEX | T_F2 | T_MAP5 | T_EW0 | T_YMM | T_N1, 0x1E); }
void vcvtne2ph2bf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
void vcvtne2ph2bf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
void vcvtne2ph2hf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x18); }
void vcvtne2ph2hf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); }
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x72); }
void vcvtnebf162ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x69); }
void vcvtnebf162iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x6B); }
void vcvtneph2bf8(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
void vcvtneph2bf8s(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x74); }
void vcvtneph2hf8(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x18); }
void vcvtneph2hf8s(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); }
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x72); }
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x5A); }
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x7B); }
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x79); }
void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x79); }
void vcvtph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B16, 0x5B); }
void vcvtph2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B16, 0x69); }
void vcvtph2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B16, 0x6B); }
void vcvtph2pd(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x5A); }
void vcvtph2psx(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP6|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x13); }
void vcvtph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_ER_X|T_MUST_EVEX|T_B16, 0x7B); }
@@ -2210,6 +2219,8 @@ void vcvtph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0,
void vcvtph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_ER_X|T_MUST_EVEX|T_B16, 0x79); }
void vcvtph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
void vcvtph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
void vcvtps2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x69); }
void vcvtps2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x6B); }
void vcvtps2phx(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW0|T_ER_Z|T_MUST_EVEX|T_B32, 0x1D); }
void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B32, 0x7B); }
void vcvtps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x79); }
@@ -2226,22 +2237,40 @@ void vcvtsh2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3
void vcvtsi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (T_F3|T_MAP5|T_ER_R|T_MUST_EVEX|T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x2A); }
void vcvtss2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_MAP5|T_EW0|T_ER_X|T_MUST_EVEX, 0x1D); }
void vcvtss2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_0F|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
void vcvttnebf162ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x68); }
void vcvttnebf162iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x6A); }
void vcvttpd2dqs(const Xmm& x, const Operand& op) { opCvt2(x, op, T_MAP5|T_EW1|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6D); }
void vcvttpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x7A); }
void vcvttpd2qqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW1|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6D); }
void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x78); }
void vcvttpd2udqs(const Xmm& x, const Operand& op) { opCvt2(x, op, T_MAP5|T_EW1|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6C); }
void vcvttpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x78); }
void vcvttpd2uqqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW1|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6C); }
void vcvttph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_F3|T_MAP5|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x5B); }
void vcvttph2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B16, 0x68); }
void vcvttph2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B16, 0x6A); }
void vcvttph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x7A); }
void vcvttph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_MAP5|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x78); }
void vcvttph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x78); }
void vcvttph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x7C); }
void vcvttph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x7C); }
void vcvttps2dqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B32, 0x6D); }
void vcvttps2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x68); }
void vcvttps2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_B32, 0x6A); }
void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x7A); }
void vcvttps2qqs(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_SAE_Y|T_MUST_EVEX|T_B32, 0x6D); }
void vcvttps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x78); }
void vcvttps2udqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B32, 0x6C); }
void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_EW0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x78); }
void vcvttps2uqqs(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP5|T_EW0|T_YMM|T_SAE_X|T_SAE_Y|T_MUST_EVEX|T_B32, 0x6C); }
void vcvttsd2sis(const Reg32e& r, const Operand& op) { uint64_t type = (T_N8|T_F2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x6D); }
void vcvttsd2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N8|T_F2|T_0F|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
void vcvttsd2usis(const Reg32e& r, const Operand& op) { uint64_t type = (T_N8|T_F2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x6C); }
void vcvttsh2si(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x2C); }
void vcvttsh2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
void vcvttss2sis(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x6D); }
void vcvttss2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_0F|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
void vcvttss2usis(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x6C); }
void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_F3|T_0F|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x7A); }
void vcvtudq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_F2|T_MAP5|T_EW0|T_ER_Z|T_MUST_EVEX|T_B32, 0x7A); }
void vcvtudq2ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x7A); }
@@ -2254,9 +2283,11 @@ void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2
void vcvtuw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
void vcvtw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); }
void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x42, imm); }
void vdivnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5E); }
void vdivph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5E); }
void vdivsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5E); }
void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x52); }
void vdpphps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x52); }
void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x88); }
@@ -2275,38 +2306,51 @@ void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) {
void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x54, imm); }
void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_Z|T_MUST_EVEX, 0x55, imm); }
void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_Z|T_MUST_EVEX, 0x55, imm); }
void vfmadd132nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x98); }
void vfmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x98); }
void vfmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x99); }
void vfmadd213nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xA8); }
void vfmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA8); }
void vfmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xA9); }
void vfmadd231nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xB8); }
void vfmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB8); }
void vfmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xB9); }
void vfmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x56); }
void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x96); }
void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA6); }
void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB6); }
void vfmsub132nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x9A); }
void vfmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9A); }
void vfmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x9B); }
void vfmsub213nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xAA); }
void vfmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAA); }
void vfmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xAB); }
void vfmsub231nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xBA); }
void vfmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBA); }
void vfmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBB); }
void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x97); }
void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA7); }
void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB7); }
void vfmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0xD6); }
void vfnmadd132nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x9C); }
void vfnmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9C); }
void vfnmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x9D); }
void vfnmadd213nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xAC); }
void vfnmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAC); }
void vfnmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xAD); }
void vfnmadd231nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xBC); }
void vfnmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBC); }
void vfnmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBD); }
void vfnmsub132nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x9E); }
void vfnmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9E); }
void vfnmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0x9F); }
void vfnmsub213nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xAE); }
void vfnmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAE); }
void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xAF); }
void vfnmsub231nepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0xBE); }
void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBE); }
void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_ER_X|T_MUST_EVEX, 0xBF); }
void vfpclasspbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_EW0|T_YMM|T_B16, 0x66, imm); }
void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }
void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
@@ -2325,12 +2369,14 @@ void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8|T_66|T_0
void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4|T_66|T_0F38|T_EW0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); }
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 0); }
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 2); }
void vgetexppbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x42); }
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x42); }
void vgetexpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x42); }
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x42); }
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0x43); }
void vgetexpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_SAE_X|T_MUST_EVEX, 0x43); }
void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0x43); }
void vgetmantpbf16(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_F2|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x26, imm); }
void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x26, imm); }
void vgetmantph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x26, imm); }
void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x26, imm); }
@@ -2345,10 +2391,19 @@ void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm)
void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX, 0x3A, imm); }
void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x38, imm); }
void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3A, imm); }
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5F); }
void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5F); }
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5D); }
void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5D); }
void vmaxpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5F); }
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5F); }
void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5F); }
void vminmaxnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x52, imm); }
void vminmaxpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B64, 0x52, imm); }
void vminmaxph(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A|T_EW0|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B16, 0x52, imm); }
void vminmaxps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Y|T_SAE_Z|T_MUST_EVEX|T_B32, 0x52, imm); }
void vminmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x53, imm); }
void vminmaxsh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x53, imm); }
void vminmaxss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x53, imm); }
void vminpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5D); }
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5D); }
void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5D); }
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); }
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW0|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX, 0x6F); }
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66|T_0F|T_EW1|T_YMM|T_ER_X|T_ER_Y|T_ER_Z|T_MUST_EVEX|T_M_K, 0x7F); }
@@ -2364,9 +2419,8 @@ void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F
void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX|T_M_K, 0x11); }
void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2|T_F3|T_MAP5|T_EW0|T_MUST_EVEX, 0x10); }
void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x7E); }
void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2|T_66|T_MAP5|T_MUST_EVEX, 0x6E); }
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A|T_YMM, 0x42, encoding, imm, T_66|T_W0|T_YMM, T_F3|T_0F3A|T_EW0|T_B32, 1); }
void vmulnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x59); }
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); }
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
@@ -2403,10 +2457,24 @@ void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { op
void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x1E, imm); }
void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3E, imm); }
void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3F, imm); }
void vpcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x63); }
void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8B); }
void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8B); }
void vpcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); }
void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0xC4); }
void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xC4); }
void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F2|T_0F38|T_YMM, 0x50, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F2|T_0F38|T_YMM, 0x51, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0x50, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0x51, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0x50, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0x51, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0xD2, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_YMM, 0xD3, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_YMM, 0xD2, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_YMM, 0xD3, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0xD2, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_YMM, 0xD3, encoding, NONE, T_W0, T_EW0|T_B32, 1); }
void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x8D); }
void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX, 0x75); }
void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x76); }
@@ -2431,6 +2499,8 @@ void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_6
void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x91, 0); }
void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW0|T_YMM|T_MUST_EVEX|T_B32, 0x44); }
void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x44); }
void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xB5); }
void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xB4); }
void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x3D); }
void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x3F); }
void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x39); }
@@ -2524,14 +2594,17 @@ void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_
void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }
void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0xCB); }
void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0xCB); }
void vrcppbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4C); }
void vrcpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4C); }
void vrcpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_MUST_EVEX, 0x4D); }
void vreducenepbf16(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_F2|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x56, imm); }
void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x56, imm); }
void vreduceph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x56, imm); }
void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x56, imm); }
void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x57, imm); }
void vreducesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x57, imm); }
void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_EW0|T_SAE_X|T_MUST_EVEX, 0x57, imm); }
void vrndscalenepbf16(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_F2|T_0F3A|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x08, imm); }
void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x09, imm); }
void vrndscaleph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x08, imm); }
void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x08, imm); }
@@ -2546,8 +2619,11 @@ void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 |
void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }
void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0xCD); }
void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_EW0|T_SAE_X|T_MUST_EVEX, 0xCD); }
void vrsqrtpbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4E); }
void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x4E); }
void vrsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_EW0|T_MUST_EVEX, 0x4F); }
void vscalefpbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x2C); }
void vscalefpbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x2C); }
void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x2C); }
void vscalefph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x2C); }
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x2C); }
@@ -2570,11 +2646,16 @@ void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) {
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
void vsqrtnepbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x51); }
void vsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_EW0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x51); }
void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_F3|T_MAP5|T_EW0|T_ER_X|T_MUST_EVEX, 0x51); }
void vsubnepbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_EW0|T_YMM|T_MUST_EVEX|T_B16, 0x5C); }
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); }
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); }
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
void vucomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F2|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2E); }
void vucomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F3|T_MAP5|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
void vucomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F3|T_0F|T_EW0|T_SAE_X|T_MUST_EVEX, 0x2E); }
#ifdef XBYAK64
void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); }
void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); }

View File

@@ -547,6 +547,7 @@ public:
XBYAK_DEFINE_TYPE(87, tKEYLOCKER_WIDE);
XBYAK_DEFINE_TYPE(88, tSSE4a);
XBYAK_DEFINE_TYPE(89, tCLWB);
XBYAK_DEFINE_TYPE(90, tTSXLDTRK);
#undef XBYAK_SPLIT_ID
#undef XBYAK_DEFINE_TYPE
@@ -684,6 +685,7 @@ public:
if (ECX & (1U << 28)) type_ |= tMOVDIR64B;
if (EDX & (1U << 5)) type_ |= tUINTR;
if (EDX & (1U << 14)) type_ |= tSERIALIZE;
if (EDX & (1U << 16)) type_ |= tTSXLDTRK;
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
if (EDX & (1U << 25)) type_ |= tAMX_INT8;

View File

@@ -2615,7 +2615,7 @@ SCED-50642:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -2742,7 +2742,7 @@ SCED-50907:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -4451,7 +4451,7 @@ SCES-50490:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -4463,7 +4463,7 @@ SCES-50491:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -4476,7 +4476,7 @@ SCES-50492:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -4488,7 +4488,7 @@ SCES-50493:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -4500,7 +4500,7 @@ SCES-50494:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -29780,12 +29780,12 @@ SLKA-25214:
name: "Final Fantasy X - International [PlayStation 2 - Big Hit Series]"
region: "NTSC-K"
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
vu0ClampMode: 3 # Fixes character flickering caused by EE clamp full.
gsHWFixes:
roundSprite: 2 # Fixes font artifacts.
autoFlush: 1 # Fixes blur effect on attacks.
SLKA-25215:
name: "Shining Wind"
region: "NTSC-K"
@@ -37641,12 +37641,12 @@ SLPM-65115:
name-en: "Final Fantasy X International"
region: "NTSC-J"
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
vu0ClampMode: 3 # Fixes character flickering caused by EE clamp full.
gsHWFixes:
roundSprite: 2 # Fixes font artifacts.
autoFlush: 1 # Fixes blur effect on attacks.
SLPM-65116:
name: "リリーのアトリエ プラス 〜ザールブルグの錬金術士3〜"
name-sort: "りりーのあとりえ ぷらす 〜ざーるぶるぐのれんきんじゅつし3〜"
@@ -43332,7 +43332,7 @@ SLPM-66124:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -46745,12 +46745,12 @@ SLPM-66677:
name-en: "Final Fantasy X - International [Ultimate Hits]"
region: "NTSC-J"
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
vu0ClampMode: 3 # Fixes character flickering caused by EE clamp full.
gsHWFixes:
roundSprite: 2 # Fixes font artifacts.
autoFlush: 1 # Fixes blur effect on attacks.
SLPM-66678:
name: "ファイナルファンタジーX-2 インターナショナル+ラストミッション [アルティメットヒッツ]"
name-sort: "ふぁいなるふぁんたじー10-2 いんたーなしょなる+らすとみっしょん [あるてぃめっとひっつ]"
@@ -48710,12 +48710,12 @@ SLPM-67513:
name: "Final Fantasy X International"
region: "NTSC-K"
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
vu0ClampMode: 3 # Fixes character flickering caused by EE clamp full.
gsHWFixes:
roundSprite: 2 # Fixes font artifacts.
autoFlush: 1 # Fixes blur effect on attacks.
SLPM-67514:
name: "Kessen"
region: "NTSC-K"
@@ -52241,7 +52241,7 @@ SLPS-25050:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -52448,12 +52448,12 @@ SLPS-25088:
region: "NTSC-J"
compat: 5
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
vu0ClampMode: 3 # Fixes character flickering caused by EE clamp full.
gsHWFixes:
roundSprite: 2 # Fixes font artifacts.
autoFlush: 1 # Fixes blur effect on attacks.
SLPS-25089:
name: "Salt Lake 2002"
region: "NTSC-J"
@@ -57535,7 +57535,7 @@ SLPS-72501:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:
@@ -59686,7 +59686,7 @@ SLUS-20312:
gameFixes:
- SoftwareRendererFMVHack # Fixes interlacing.
roundModes:
eeRoundMode: 1 # Fixes reverse control and boss in some places.
eeRoundMode: 2 # Fixes reverse control and boss in some places.
clampModes:
eeClampMode: 3 # Fixes animations.
gsHWFixes:

View File

@@ -1911,11 +1911,6 @@ void MainWindow::onInputRecStopActionTriggered()
}
}
void MainWindow::onInputRecOpenSettingsTriggered()
{
// TODO - Vaser - Implement
}
InputRecordingViewer* MainWindow::getInputRecordingViewer()
{
if (!m_input_recording_viewer)

View File

@@ -179,7 +179,6 @@ private Q_SLOTS:
void onInputRecNewActionTriggered();
void onInputRecPlayActionTriggered();
void onInputRecStopActionTriggered();
void onInputRecOpenSettingsTriggered();
void onInputRecOpenViewer();
void onVMStarting();

View File

@@ -196,7 +196,6 @@
<addaction name="actionInputRecStop"/>
<addaction name="separator"/>
<addaction name="actionInputRecOpenViewer"/>
<addaction name="actionInputRecOpenSettings"/>
<addaction name="separator"/>
<addaction name="actionInputRecConsoleLogs"/>
<addaction name="actionInputRecControllerLogs"/>
@@ -981,14 +980,6 @@
<string extracomment="This section refers to the Input Recording submenu.">Stop</string>
</property>
</action>
<action name="actionInputRecOpenSettings">
<property name="enabled">
<bool>false</bool>
</property>
<property name="text">
<string extracomment="This section refers to the Input Recording submenu.">Settings</string>
</property>
</action>
<action name="actionRecording_Console_Logs">
<property name="text">
<string>Input Recording Logs</string>

View File

@@ -56,6 +56,9 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(SettingsWindow* dialog, QWidget*
SettingWidgetBinder::BindWidgetToFloatSetting(sif, m_ui.ntscFrameRate, "EmuCore/GS", "FramerateNTSC", 59.94f);
SettingWidgetBinder::BindWidgetToFloatSetting(sif, m_ui.palFrameRate, "EmuCore/GS", "FrameratePAL", 50.00f);
dialog->registerWidgetHelp(m_ui.savestateSelector, tr("Use Save State Selector"), tr("Checked"),
tr("Show a save state selector UI when switching slots instead of showing a notification bubble."));
SettingWidgetBinder::BindWidgetToIntSetting(
sif, m_ui.savestateCompressionMethod, "EmuCore", "SavestateCompressionType", static_cast<int>(SavestateCompressionMethod::Zstandard));
@@ -67,6 +70,7 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(SettingsWindow* dialog, QWidget*
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.backupSaveStates, "EmuCore", "BackupSavestate", true);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.saveStateOnShutdown, "EmuCore", "SaveStateOnShutdown", false);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.savestateSelector, "EmuCore", "UseSavestateSelector", true);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.pineEnable, "EmuCore", "EnablePINE", false);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.pineSlot, "EmuCore", "PINESlot", 28011);

View File

@@ -32,9 +32,9 @@
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<y>-447</y>
<width>790</width>
<height>1023</height>
<height>1049</height>
</rect>
</property>
<layout class="QVBoxLayout" name="verticalLayout">
@@ -422,10 +422,10 @@
<string>Savestate Settings</string>
</property>
<layout class="QGridLayout" name="savestateSettingsLayout">
<item row="0" column="0">
<widget class="QLabel" name="savestateCompressionLabel">
<item row="3" column="1">
<widget class="QCheckBox" name="saveStateOnShutdown">
<property name="text">
<string>Compression Method:</string>
<string>Save State On Shutdown</string>
</property>
</widget>
</item>
@@ -491,10 +491,17 @@
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QCheckBox" name="saveStateOnShutdown">
<item row="0" column="0">
<widget class="QLabel" name="savestateCompressionLabel">
<property name="text">
<string>Save State On Shutdown</string>
<string>Compression Method:</string>
</property>
</widget>
</item>
<item row="4" column="0">
<widget class="QCheckBox" name="savestateSelector">
<property name="text">
<string>Use Save State Selector</string>
</property>
</widget>
</item>

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,17 @@
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Uploaded to: SVG Repo, www.svgrepo.com, Transformed by: SVG Repo Mixer Tools -->
<svg width="256px" height="256px" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" fill="#000000" class="bi bi-camera-video">
<g id="SVGRepo_bgCarrier" stroke-width="0"/>
<g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"/>
<?xml version="1.0" encoding="UTF-8"?>
<svg id="camera_video_-_PCSX2" data-name="camera video - PCSX2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 150 150">
<defs>
<style>
.cls-1 {
fill: none;
stroke: #000;
stroke-miterlimit: 10;
stroke-width: 12px;
}
</style>
</defs>
<g id="By_Maxihplay" data-name="By Maxihplay">
<rect class="cls-1" x="9.4" y="33.49" width="90.95" height="83.02" rx="12.96" ry="12.96"/>
<path class="cls-1" d="m138.43,105.09l-37.81-15.05v-30.07l37.81-15.05c1.03-.41,2.16.3,2.16,1.35v57.48c0,1.05-1.14,1.76-2.16,1.35Z"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 782 B

After

Width:  |  Height:  |  Size: 623 B

View File

@@ -1,20 +1,26 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 27.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="PCSX2_-_Namco" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px"
y="0px" viewBox="0 0 150 150" style="enable-background:new 0 0 150 150;" xml:space="preserve">
<path id="By_Maxihplay" d="M130.4,64.3c3.3-4.2,5.2-9.4,5.2-15.1c0-13.5-11-24.4-24.4-24.4c-4.7,0-9.1,1.4-12.9,3.7
c-5.4-8-14.7-12.8-24.4-12.2c-8.5,0.6-16.1,5.3-20.7,12.3c-3.8-2.4-8.3-3.9-13.2-3.9c-13.5,0-24.4,11-24.4,24.4c0,5.2,1.6,10,4.4,14
c-2.6,6.9-8.8,25.5-7.3,37.9c2,15.9,10.9,22.2,11.9,22.8c0.6,0.4,1.3,0.7,2.1,0.8c4.8,0.9,9.5,0.4,13.4-1.5c3.6-1.8,6.4-4.8,8.5-9.1
c3.1-6.5,2.8-15.8,1.6-24.7c0.2,0.1,0.8-0.1,1.4-0.2c4.2,8.6,13.1,14.6,23.4,14.6c10.2,0,19.1-5.9,23.3-14.5c0.5,0.1,0.9,0.2,1,0.1
c-0.9,7-1.2,17.8,2.1,24.7c2.1,4.3,4.8,7.3,8.5,9.1c2.6,1.3,5.7,2,8.9,2c1.5,0,3-0.1,4.5-0.4c0.8-0.2,1.5-0.4,2.1-0.9
c1-0.7,9.9-6.9,11.9-22.8C138.8,89.4,133.1,71.9,130.4,64.3z M111.2,36.8c6.8,0,12.4,5.6,12.4,12.4c0,6.8-5.6,12.4-12.4,12.4
s-12.4-5.6-12.4-12.4C98.8,42.4,104.3,36.8,111.2,36.8z M74.7,28.3c6.3-0.4,12.3,3.3,15,9.3c-1.9,3.5-2.9,7.4-2.9,11.6
c0,2,0.2,3.9,0.7,5.7c-3.7-2-7.9-3.2-12.5-3.2c-4,0-7.8,0.9-11.1,2.5c0.3-1.6,0.5-3.3,0.5-5.1c0-4-1-7.8-2.7-11.1
C63.9,32.6,68.9,28.7,74.7,28.3z M40,36.8c6.8,0,12.4,5.6,12.4,12.4c0,6.8-5.6,12.4-12.4,12.4s-12.4-5.6-12.4-12.4
C27.6,42.4,33.1,36.8,40,36.8z M45.8,78l-1.8,0.5c-3.9,1.1-6.3,4.5-5.7,8.1l0,0.1c0.7,4.8,2.7,17.4-0.2,23.4c-1.5,3.2-3,4-3.5,4.2
c0,0,0,0,0,0c-2.2,1.1-4.8,0.2-6.2-1.7c-1.7-2.5-3.8-6.5-4.5-12.6c-1-7.9,2.3-20.4,5.2-28.9c3.3,1.6,7,2.6,10.9,2.6
c3.5,0,6.7-0.7,9.7-2c-0.5,1.8-0.7,3.7-0.7,5.7C47.8,77.5,46.6,77.8,45.8,78z M75,91.9c-7.7,0-14-6.3-14-14s6.3-14,14-14
c7.7,0,14,6.3,14,14S82.7,91.9,75,91.9z M126.1,100c-0.8,6.1-2.8,10.1-4.5,12.6c-1.4,2-4,2.8-6.2,1.8c0,0,0,0,0,0
c-0.5-0.3-2-1-3.5-4.2c-2.9-6-1.8-19.2-1.2-23.5l0-0.2c0.5-3.2-1.3-6.2-4.4-7.7l-1.1-0.5c-1-0.5-2.6-0.8-4.1-0.9
c-0.1-2.1-0.4-4.2-0.9-6.2c3.3,1.7,7.1,2.7,11.1,2.7c3.5,0,6.9-0.8,9.9-2.1C123.9,80.1,127,92.3,126.1,100z"/>
</svg>
<?xml version="1.0" encoding="UTF-8"?>
<svg id="Jogcon_-_PCSX2" data-name="Jogcon - PCSX2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 150 150">
<defs>
<style>
.cls-1 {
fill: none;
stroke: #000;
stroke-linecap: round;
stroke-linejoin: round;
stroke-width: 12px;
}
</style>
</defs>
<g id="By_maxihplay" data-name="By @maxihplay">
<g>
<path class="cls-1" d="m126.03,63.22c3.43-3.42,5.55-8.15,5.55-13.38,0-10.44-8.47-18.91-18.91-18.91-6.44,0-12.12,3.22-15.54,8.13"/>
<path class="cls-1" d="m24.65,63.22c-3.43-3.42-4.52-8.15-4.52-13.38,0-10.44,8.47-18.91,18.91-18.91,6.44,0,12.12,3.22,15.54,8.13"/>
<g>
<path d="m130.41,61.42c-1.1-2.72-4.43-4.12-7.43-3.12-3.01.99-4.55,4-3.46,6.72,2.66,6.58,9.26,26.04,7.91,36.9-.78,6.27-2.83,10.41-4.61,12.93-1.43,2.03-4.12,2.88-6.35,1.79-.01,0-.03-.01-.04-.02-.56-.28-2.05-1.02-3.65-4.34-2.98-6.2-1.89-19.72-1.2-24.16l.04-.26c.51-3.24-1.32-6.4-4.54-7.86l-1.15-.53c-1.81-.83-5.55-1.26-7.4-.87.23,1.26,0,3.43,0,4.75,0,3.3-.69,4.83-2.04,7.68,1.01-.44,3.25.21,3.51.11-.92,7.22-1.22,18.32,2.14,25.31,2.13,4.44,4.98,7.5,8.69,9.35,2.71,1.35,5.83,2.04,9.1,2.04,1.52,0,3.06-.15,4.61-.45.78-.15,1.52-.45,2.17-.88,1.04-.68,10.2-7.1,12.23-23.43,1.93-15.54-8.12-40.61-8.55-41.67Z"/>
<path d="m52.83,90.71c-1.19-2.71-2.94-5.14-2.94-8.24,0-1.21.68-2.7.87-3.87-1.34-.25-4.31.36-5.68.74l-1.82.51c-3.97,1.1-6.44,4.6-5.87,8.32l.02.15c.75,4.88,2.74,17.84-.2,23.97-1.6,3.32-3.09,4.06-3.65,4.34-.01,0-.03.01-.04.02-2.23,1.09-4.91.24-6.33-1.79-1.77-2.52-3.85-6.69-4.62-12.93-1.35-10.86,5.26-30.32,7.91-36.9,1.1-2.72-.45-5.73-3.45-6.72-3.01-1-6.33.4-7.43,3.12-.43,1.06-10.47,26.13-8.55,41.67,2.02,16.33,11.19,22.75,12.23,23.43.65.42,1.39.72,2.17.88,4.88.95,9.74.39,13.7-1.59,3.71-1.85,6.56-4.91,8.69-9.35,3.23-6.71,2.84-16.24,1.66-25.32.38.12,2.88-.55,3.33-.43Z"/>
</g>
<circle class="cls-1" cx="75" cy="79.2" r="20.58"/>
<path class="cls-1" d="m55.23,37.19c2.88-8.45,10.44-14.38,19.06-14.98,9.65-.68,18.83,5.45,22.14,14.98"/>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.0 KiB

View File

@@ -1,30 +1,20 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 26.5.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="Duckstation_-_Negcon_00000104687465272175918400000016919813319004934028_"
xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px" viewBox="0 0 150 150"
style="enable-background:new 0 0 150 150;" xml:space="preserve">
<style type="text/css">
.st0{fill:none;}
.st1{fill:none;stroke:#000000;stroke-width:12;stroke-miterlimit:10;}
.st2{fill:none;stroke:#000000;stroke-width:12;stroke-linecap:round;stroke-miterlimit:10;}
</style>
<g id="tela">
<rect id="tela_00000129895939108887223950000017508827257489233583_" class="st0" width="150" height="150"/>
</g>
<g id="By_Maxihplay_00000152974811067316581070000007946929605663458202_">
<g id="control">
<g id="B">
<path class="st1" d="M91.9,87.8h14l20.2,21.1c3.6,2.5,8.5-0.2,8.4-4.6l-3.9-35.4c0-10.3-8.3-18.6-18.6-18.6H91.9V87.8L91.9,87.8z
"/>
<line class="st1" x1="83.7" y1="40.1" x2="83.5" y2="99.1"/>
<polyline class="st2" points="101.6,46.2 116.7,46.2 126.1,52 "/>
</g>
<g id="A">
<path class="st1" d="M58.1,87.8h-14l-20.2,21.1c-3.6,2.5-8.5-0.2-8.4-4.6l3.9-35.4c0-10.3,8.3-18.6,18.6-18.6h20.1V87.8
L58.1,87.8z"/>
<line class="st1" x1="66.3" y1="40.1" x2="66.5" y2="99.1"/>
<polyline class="st2" points="48.1,46.2 33,46.2 23.6,52 "/>
</g>
</g>
</g>
</svg>
<?xml version="1.0" encoding="UTF-8"?>
<svg id="Negcon_-_PCSX2" data-name="Negcon - PCSX2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 150 150">
<defs>
<style>
.cls-1 {
fill: none;
stroke: #000;
stroke-linecap: round;
stroke-miterlimit: 10;
stroke-width: 12px;
}
</style>
</defs>
<g id="By_maxihplay" data-name="By @maxihplay">
<path d="m130.96,116.52c-2.29,0-4.57-.69-6.54-2.06l-.5-.34-19.54-20.41h-17.92v-50.78h26.79c13.8,0,25.04,11.13,25.23,24.88l4.03,36.82c.1,4.4-2.24,8.42-6.11,10.5-1.72.92-3.59,1.38-5.46,1.38Zm-32.49-34.81h11.04l20.83,21.76-3.85-34.98v-.33c0-7.3-5.94-13.23-13.23-13.23h-14.79v26.78Z"/>
<line class="cls-1" x1="84" y1="39.48" x2="83.79" y2="97.31"/>
<path d="m19.04,116.52c-1.86,0-3.74-.46-5.46-1.38-3.87-2.08-6.21-6.1-6.11-10.5l.03-.52,4-36.3c.19-13.75,11.44-24.88,25.23-24.88h26.79v50.78h-17.92l-19.53,20.41-.5.34c-1.97,1.37-4.25,2.06-6.54,2.06Zm17.7-61.59c-7.3,0-13.23,5.94-13.23,13.23l-.04.66-3.82,34.65,20.83-21.76h11.04v-26.78h-14.79Z"/>
<line class="cls-1" x1="66" y1="39.48" x2="66.21" y2="97.31"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 1.4 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

View File

@@ -1,7 +1,15 @@
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Uploaded to: SVG Repo, www.svgrepo.com, Transformed by: SVG Repo Mixer Tools -->
<svg width="256px" height="256px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<g id="SVGRepo_bgCarrier" stroke-width="0"/>
<g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"/>
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 27.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="Ball_-_PCSX2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px"
y="0px" viewBox="0 0 150 150" style="enable-background:new 0 0 150 150;" xml:space="preserve">
<style type="text/css">
.st0{fill:none;stroke:#000000;stroke-width:12;stroke-miterlimit:42.67;}
.st1{stroke:#000000;stroke-width:7;stroke-miterlimit:42.67;}
</style>
<g id="Kam_-_ball">
<circle class="st0" cx="75" cy="75" r="55.7"/>
<circle class="st1" cx="82.1" cy="44.4" r="8.4"/>
<circle class="st1" cx="79.2" cy="77.8" r="8.3"/>
<path class="st1" d="M52.7,52.7c4.6,0,8.4,3.7,8.4,8.4s-3.7,8.3-8.3,8.3s-8.3-3.7-8.3-8.3S48.1,52.7,52.7,52.7z"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 891 B

After

Width:  |  Height:  |  Size: 817 B

View File

@@ -1,19 +1,27 @@
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 -10 100 110" >
<g>
<path
fill="none"
stroke="black"
stroke-width="8"
stroke-miterlimit="5"
d="m 0 50
l 20,0
l 7.5,20
l 15,-45
l 15,55
l 15,-65
l 7.5,35
l 20,0" />
<?xml version="1.0" encoding="UTF-8"?>
<svg id="Vibration_-_PCSX2" data-name="Vibration - PCSX2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 150 150">
<defs>
<style>
.cls-1 {
stroke-linecap: round;
stroke-linejoin: round;
}
.cls-1, .cls-2 {
fill: none;
stroke: #000;
stroke-width: 12px;
}
.cls-2 {
stroke-miterlimit: 5;
}
</style>
</defs>
<g id="By_maxihplay" data-name="By @maxihplay">
<rect class="cls-2" x="51.59" y="20.59" width="46.81" height="87.78" rx="17.19" ry="17.19"/>
<path class="cls-1" d="m75,113.37v7.22c0,4.85,3.97,8.82,8.82,8.82h18.88"/>
<polyline class="cls-1" points="130.09 33.5 116.93 58.56 132.07 75.26 118.88 95.92 130.8 116.5"/>
<polyline class="cls-1" points="19.91 33.5 33.07 58.56 17.93 75.26 31.12 95.92 19.2 116.5"/>
</g>
</svg>
</svg>

Before

Width:  |  Height:  |  Size: 332 B

After

Width:  |  Height:  |  Size: 879 B

View File

@@ -1,7 +1,17 @@
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Uploaded to: SVG Repo, www.svgrepo.com, Transformed by: SVG Repo Mixer Tools -->
<svg width="256px" height="256px" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" fill="#ffffff" class="bi bi-camera-video">
<g id="SVGRepo_bgCarrier" stroke-width="0"/>
<g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"/>
<?xml version="1.0" encoding="UTF-8"?>
<svg id="camera_video_-_PCSX2" data-name="camera video - PCSX2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 150 150">
<defs>
<style>
.cls-1 {
fill: none;
stroke: #fff;
stroke-miterlimit: 10;
stroke-width: 12px;
}
</style>
</defs>
<g id="By_Maxihplay" data-name="By Maxihplay">
<rect class="cls-1" x="9.4" y="33.49" width="90.95" height="83.02" rx="12.96" ry="12.96"/>
<path class="cls-1" d="m138.43,105.09l-37.81-15.05v-30.07l37.81-15.05c1.03-.41,2.16.3,2.16,1.35v57.48c0,1.05-1.14,1.76-2.16,1.35Z"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 782 B

After

Width:  |  Height:  |  Size: 623 B

View File

@@ -1,23 +1,30 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 27.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="PCSX2_-_Namco" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px"
y="0px" viewBox="0 0 150 150" style="enable-background:new 0 0 150 150;" xml:space="preserve">
<style type="text/css">
.st0{fill:#FFFFFF;}
</style>
<path id="By_Maxihplay" class="st0" d="M130.4,64.3c3.3-4.2,5.2-9.4,5.2-15.1c0-13.5-11-24.4-24.4-24.4c-4.7,0-9.1,1.4-12.9,3.7
c-5.4-8-14.7-12.8-24.4-12.2c-8.5,0.6-16.1,5.3-20.7,12.3c-3.8-2.4-8.3-3.9-13.2-3.9c-13.5,0-24.4,11-24.4,24.4c0,5.2,1.6,10,4.4,14
c-2.6,6.9-8.8,25.5-7.3,37.9c2,15.9,10.9,22.2,11.9,22.8c0.6,0.4,1.3,0.7,2.1,0.8c4.8,0.9,9.5,0.4,13.4-1.5c3.6-1.8,6.4-4.8,8.5-9.1
c3.1-6.5,2.8-15.8,1.6-24.7c0.2,0.1,0.8-0.1,1.4-0.2c4.2,8.6,13.1,14.6,23.4,14.6c10.2,0,19.1-5.9,23.3-14.5c0.5,0.1,0.9,0.2,1,0.1
c-0.9,7-1.2,17.8,2.1,24.7c2.1,4.3,4.8,7.3,8.5,9.1c2.6,1.3,5.7,2,8.9,2c1.5,0,3-0.1,4.5-0.4c0.8-0.2,1.5-0.4,2.1-0.9
c1-0.7,9.9-6.9,11.9-22.8C138.8,89.4,133.1,71.9,130.4,64.3z M111.2,36.8c6.8,0,12.4,5.6,12.4,12.4c0,6.8-5.6,12.4-12.4,12.4
s-12.4-5.6-12.4-12.4C98.8,42.4,104.3,36.8,111.2,36.8z M74.7,28.3c6.3-0.4,12.3,3.3,15,9.3c-1.9,3.5-2.9,7.4-2.9,11.6
c0,2,0.2,3.9,0.7,5.7c-3.7-2-7.9-3.2-12.5-3.2c-4,0-7.8,0.9-11.1,2.5c0.3-1.6,0.5-3.3,0.5-5.1c0-4-1-7.8-2.7-11.1
C63.9,32.6,68.9,28.7,74.7,28.3z M40,36.8c6.8,0,12.4,5.6,12.4,12.4c0,6.8-5.6,12.4-12.4,12.4s-12.4-5.6-12.4-12.4
C27.6,42.4,33.1,36.8,40,36.8z M45.8,78l-1.8,0.5c-3.9,1.1-6.3,4.5-5.7,8.1l0,0.1c0.7,4.8,2.7,17.4-0.2,23.4c-1.5,3.2-3,4-3.5,4.2
c0,0,0,0,0,0c-2.2,1.1-4.8,0.2-6.2-1.7c-1.7-2.5-3.8-6.5-4.5-12.6c-1-7.9,2.3-20.4,5.2-28.9c3.3,1.6,7,2.6,10.9,2.6
c3.5,0,6.7-0.7,9.7-2c-0.5,1.8-0.7,3.7-0.7,5.7C47.8,77.5,46.6,77.8,45.8,78z M75,91.9c-7.7,0-14-6.3-14-14s6.3-14,14-14
c7.7,0,14,6.3,14,14S82.7,91.9,75,91.9z M126.1,100c-0.8,6.1-2.8,10.1-4.5,12.6c-1.4,2-4,2.8-6.2,1.8c0,0,0,0,0,0
c-0.5-0.3-2-1-3.5-4.2c-2.9-6-1.8-19.2-1.2-23.5l0-0.2c0.5-3.2-1.3-6.2-4.4-7.7l-1.1-0.5c-1-0.5-2.6-0.8-4.1-0.9
c-0.1-2.1-0.4-4.2-0.9-6.2c3.3,1.7,7.1,2.7,11.1,2.7c3.5,0,6.9-0.8,9.9-2.1C123.9,80.1,127,92.3,126.1,100z"/>
</svg>
<?xml version="1.0" encoding="UTF-8"?>
<svg id="Jogcon_-_PCSX2" data-name="Jogcon - PCSX2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 150 150">
<defs>
<style>
.cls-1 {
fill: #fff;
}
.cls-2 {
fill: none;
stroke: #fff;
stroke-linecap: round;
stroke-linejoin: round;
stroke-width: 12px;
}
</style>
</defs>
<g id="By_maxihplay" data-name="By @maxihplay">
<g>
<path class="cls-2" d="m126.03,63.22c3.43-3.42,5.55-8.15,5.55-13.38,0-10.44-8.47-18.91-18.91-18.91-6.44,0-12.12,3.22-15.54,8.13"/>
<path class="cls-2" d="m24.65,63.22c-3.43-3.42-4.52-8.15-4.52-13.38,0-10.44,8.47-18.91,18.91-18.91,6.44,0,12.12,3.22,15.54,8.13"/>
<g>
<path class="cls-1" d="m130.41,61.42c-1.1-2.72-4.43-4.12-7.43-3.12-3.01.99-4.55,4-3.46,6.72,2.66,6.58,9.26,26.04,7.91,36.9-.78,6.27-2.83,10.41-4.61,12.93-1.43,2.03-4.12,2.88-6.35,1.79-.01,0-.03-.01-.04-.02-.56-.28-2.05-1.02-3.65-4.34-2.98-6.2-1.89-19.72-1.2-24.16l.04-.26c.51-3.24-1.32-6.4-4.54-7.86l-1.15-.53c-1.81-.83-5.55-1.26-7.4-.87.23,1.26,0,3.43,0,4.75,0,3.3-.69,4.83-2.04,7.68,1.01-.44,3.25.21,3.51.11-.92,7.22-1.22,18.32,2.14,25.31,2.13,4.44,4.98,7.5,8.69,9.35,2.71,1.35,5.83,2.04,9.1,2.04,1.52,0,3.06-.15,4.61-.45.78-.15,1.52-.45,2.17-.88,1.04-.68,10.2-7.1,12.23-23.43,1.93-15.54-8.12-40.61-8.55-41.67Z"/>
<path class="cls-1" d="m52.83,90.71c-1.19-2.71-2.94-5.14-2.94-8.24,0-1.21.68-2.7.87-3.87-1.34-.25-4.31.36-5.68.74l-1.82.51c-3.97,1.1-6.44,4.6-5.87,8.32l.02.15c.75,4.88,2.74,17.84-.2,23.97-1.6,3.32-3.09,4.06-3.65,4.34-.01,0-.03.01-.04.02-2.23,1.09-4.91.24-6.33-1.79-1.77-2.52-3.85-6.69-4.62-12.93-1.35-10.86,5.26-30.32,7.91-36.9,1.1-2.72-.45-5.73-3.45-6.72-3.01-1-6.33.4-7.43,3.12-.43,1.06-10.47,26.13-8.55,41.67,2.02,16.33,11.19,22.75,12.23,23.43.65.42,1.39.72,2.17.88,4.88.95,9.74.39,13.7-1.59,3.71-1.85,6.56-4.91,8.69-9.35,3.23-6.71,2.84-16.24,1.66-25.32.38.12,2.88-.55,3.33-.43Z"/>
</g>
<circle class="cls-2" cx="75" cy="79.2" r="20.58"/>
<path class="cls-2" d="m55.23,37.19c2.88-8.45,10.44-14.38,19.06-14.98,9.65-.68,18.83,5.45,22.14,14.98"/>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.1 KiB

View File

@@ -1 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?><svg id="Duckstation_-_Negcon" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 150 150"><defs><style>.cls-1{stroke-linecap:round;}.cls-1,.cls-2,.cls-3{fill:none;}.cls-1,.cls-3{stroke:#fff;stroke-miterlimit:10;stroke-width:12px;}</style></defs><g id="tela"><rect id="tela-2" class="cls-2" width="150" height="150"/></g><g id="By_Maxihplay"><g id="control"><g id="B"><path class="cls-3" d="M91.9,87.8h14l20.2,21.1c3.6,2.5,8.5-.2,8.4-4.6l-3.9-35.4c0-10.3-8.3-18.6-18.6-18.6h-20.1v37.5h0Z"/><line class="cls-3" x1="83.7" y1="40.1" x2="83.5" y2="99.1"/><polyline class="cls-1" points="101.6 46.2 116.7 46.2 126.1 52"/></g><g id="A"><path class="cls-3" d="M58.1,87.8h-14l-20.2,21.1c-3.6,2.5-8.5-.2-8.4-4.6l3.9-35.4c0-10.3,8.3-18.6,18.6-18.6h20.1v37.5h0Z"/><line class="cls-3" x1="66.3" y1="40.1" x2="66.5" y2="99.1"/><polyline class="cls-1" points="48.1 46.2 33 46.2 23.6 52"/></g></g></g></svg>
<?xml version="1.0" encoding="UTF-8"?>
<svg id="Negcon_-_PCSX2" data-name="Negcon - PCSX2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 150 150">
<defs>
<style>
.cls-1 {
fill: #fff;
}
.cls-2 {
fill: none;
stroke: #fff;
stroke-linecap: round;
stroke-miterlimit: 10;
stroke-width: 12px;
}
</style>
</defs>
<g id="By_maxihplay" data-name="By @maxihplay">
<path class="cls-1" d="m130.96,116.52c-2.29,0-4.57-.69-6.54-2.06l-.5-.34-19.54-20.41h-17.92v-50.78h26.79c13.8,0,25.04,11.13,25.23,24.88l4.03,36.82c.1,4.4-2.24,8.42-6.11,10.5-1.72.92-3.59,1.38-5.46,1.38Zm-32.49-34.81h11.04l20.83,21.76-3.85-34.98v-.33c0-7.3-5.94-13.23-13.24-13.23h-14.79v26.78Z"/>
<line class="cls-2" x1="84" y1="39.48" x2="83.79" y2="97.31"/>
<path class="cls-1" d="m19.04,116.52c-1.86,0-3.74-.46-5.46-1.38-3.87-2.08-6.21-6.1-6.11-10.5l.03-.52,4-36.3c.19-13.75,11.44-24.88,25.23-24.88h26.79v50.78h-17.92l-19.54,20.41-.5.34c-1.97,1.37-4.25,2.06-6.54,2.06Zm17.7-61.59c-7.3,0-13.23,5.94-13.23,13.23l-.04.66-3.82,34.65,20.83-21.76h11.04v-26.78h-14.79Z"/>
<line class="cls-2" x1="66" y1="39.48" x2="66.21" y2="97.31"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 927 B

After

Width:  |  Height:  |  Size: 1.2 KiB

View File

@@ -1,7 +1,15 @@
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Uploaded to: SVG Repo, www.svgrepo.com, Transformed by: SVG Repo Mixer Tools -->
<svg width="256px" height="256px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<g id="SVGRepo_bgCarrier" stroke-width="0"/>
<g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"/>
<?xml version="1.0" encoding="utf-8"?>
<!-- Generator: Adobe Illustrator 27.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg version="1.1" id="Ball_-_PCSX2" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px"
y="0px" viewBox="0 0 150 150" style="enable-background:new 0 0 150 150;" xml:space="preserve">
<style type="text/css">
.st0{fill:none;stroke:#FFFFFF;stroke-width:12;stroke-miterlimit:42.67;}
.st1{fill:#FFFFFF;stroke:#FFFFFF;stroke-width:7;stroke-miterlimit:42.67;}
</style>
<g id="Kam_-_ball">
<circle class="st0" cx="75" cy="75" r="55.7"/>
<circle class="st1" cx="82.1" cy="44.4" r="8.4"/>
<circle class="st1" cx="79.2" cy="77.8" r="8.3"/>
<path class="st1" d="M52.7,52.7c4.6,0,8.4,3.7,8.4,8.4s-3.7,8.3-8.3,8.3s-8.3-3.7-8.3-8.3S48.1,52.7,52.7,52.7z"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 891 B

After

Width:  |  Height:  |  Size: 830 B

View File

@@ -1,19 +1,27 @@
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 -10 100 110" >
<g>
<path
fill="none"
stroke="white"
stroke-width="8"
stroke-miterlimit="5"
d="m 0 50
l 20,0
l 7.5,20
l 15,-45
l 15,55
l 15,-65
l 7.5,35
l 20,0" />
<?xml version="1.0" encoding="UTF-8"?>
<svg id="Vibration_-_PCSX2" data-name="Vibration - PCSX2" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 150 150">
<defs>
<style>
.cls-1 {
stroke-miterlimit: 5;
}
.cls-1, .cls-2 {
fill: none;
stroke: #fff;
stroke-width: 12px;
}
.cls-2 {
stroke-linecap: round;
stroke-linejoin: round;
}
</style>
</defs>
<g id="By_maxihplay" data-name="By @maxihplay">
<rect class="cls-1" x="51.59" y="20.59" width="46.81" height="87.78" rx="17.19" ry="17.19"/>
<path class="cls-2" d="m75,113.37v7.22c0,4.85,3.97,8.82,8.82,8.82h18.88"/>
<polyline class="cls-2" points="130.09 33.5 116.93 58.56 132.07 75.26 118.88 95.92 130.8 116.5"/>
<polyline class="cls-2" points="19.91 33.5 33.07 58.56 17.93 75.26 31.12 95.92 19.2 116.5"/>
</g>
</svg>
</svg>

Before

Width:  |  Height:  |  Size: 332 B

After

Width:  |  Height:  |  Size: 879 B

View File

@@ -1276,6 +1276,7 @@ struct Pcsx2Config
EnableGameFixes : 1, // enables automatic game fixes
SaveStateOnShutdown : 1, // default value for saving state on shutdown
EnableDiscordPresence : 1, // enables discord rich presence integration
UseSavestateSelector: 1,
InhibitScreensaver : 1,
BackupSavestate : 1,
McdFolderAutoManage : 1,

View File

@@ -106,6 +106,11 @@ static bool CanPause()
return true;
}
static bool UseSavestateSelector()
{
return EmuConfig.UseSavestateSelector;
}
BEGIN_HOTKEY_LIST(g_common_hotkeys)
DEFINE_HOTKEY("OpenPauseMenu", TRANSLATE_NOOP("Hotkeys", "System"), TRANSLATE_NOOP("Hotkeys", "Open Pause Menu"),
[](s32 pressed) {
@@ -222,12 +227,12 @@ DEFINE_HOTKEY("InputRecToggleMode", TRANSLATE_NOOP("Hotkeys", "System"),
DEFINE_HOTKEY("PreviousSaveStateSlot", TRANSLATE_NOOP("Hotkeys", "Save States"),
TRANSLATE_NOOP("Hotkeys", "Select Previous Save Slot"), [](s32 pressed) {
if (!pressed && VMManager::HasValidVM())
SaveStateSelectorUI::SelectPreviousSlot(true);
SaveStateSelectorUI::SelectPreviousSlot(UseSavestateSelector());
})
DEFINE_HOTKEY("NextSaveStateSlot", TRANSLATE_NOOP("Hotkeys", "Save States"),
TRANSLATE_NOOP("Hotkeys", "Select Next Save Slot"), [](s32 pressed) {
if (!pressed && VMManager::HasValidVM())
SaveStateSelectorUI::SelectNextSlot(true);
SaveStateSelectorUI::SelectNextSlot(UseSavestateSelector());
})
DEFINE_HOTKEY("SaveStateToSlot", TRANSLATE_NOOP("Hotkeys", "Save States"),
TRANSLATE_NOOP("Hotkeys", "Save State To Selected Slot"), [](s32 pressed) {

View File

@@ -3168,6 +3168,9 @@ void FullscreenUI::DrawInterfaceSettingsPage()
DrawToggleSetting(bsi, FSUI_ICONSTR(ICON_FA_ARCHIVE, "Create Save State Backups"),
FSUI_CSTR("Creates a backup copy of a save state if it already exists when the save is created. The backup copy has a .backup suffix"),
"EmuCore", "BackupSavestate", true);
DrawToggleSetting(bsi, FSUI_ICONSTR(ICON_FA_INFO_CIRCLE, "Use Save State Selector"),
FSUI_CSTR("Show a save state selector UI when switching slots instead of showing a notification bubble."),
"EmuCore", "UseSavestateSelector", true);
if (DrawToggleSetting(bsi, FSUI_ICONSTR(ICON_FA_LIGHTBULB, "Use Light Theme"),
FSUI_CSTR("Uses a light coloured theme instead of the default dark theme."), "UI", "UseLightFullscreenUITheme", false))
{
@@ -6221,13 +6224,23 @@ void FullscreenUI::DrawGameGrid(const ImVec2& heading_size)
SmallString draw_title;
u32 grid_x = 0;
ImGui::SetCursorPos(ImVec2(start_x, 0.0f));
for (const GameList::Entry* entry : s_game_list_sorted_entries)
{
ImGuiWindow* window = ImGui::GetCurrentWindow();
if (window->SkipItems)
continue;
if (grid_x == grid_count_x)
{
grid_x = 0;
ImGui::SetCursorPosX(start_x);
ImGui::SetCursorPosY(ImGui::GetCursorPosY() + item_spacing);
}
else
{
ImGui::SameLine(start_x + static_cast<float>(grid_x) * (item_width + item_spacing));
}
const ImGuiID id = window->GetID(entry->path.c_str(), entry->path.c_str() + entry->path.length());
const ImVec2 pos(window->DC.CursorPos);
ImRect bb(pos, pos + item_size);
@@ -6280,16 +6293,6 @@ void FullscreenUI::DrawGameGrid(const ImVec2& heading_size)
}
grid_x++;
if (grid_x == grid_count_x)
{
grid_x = 0;
ImGui::SetCursorPosX(start_x);
ImGui::SetCursorPosY(ImGui::GetCursorPosY() + item_spacing);
}
else
{
ImGui::SameLine(start_x + static_cast<float>(grid_x) * (item_width + item_spacing));
}
}
EndMenuButtons();

View File

@@ -1928,6 +1928,7 @@ void Pcsx2Config::LoadSaveCore(SettingsWrapper& wrap)
SettingsWrapBitBool(EnableRecordingTools);
SettingsWrapBitBool(EnableGameFixes);
SettingsWrapBitBool(SaveStateOnShutdown);
SettingsWrapBitBool(UseSavestateSelector);
SettingsWrapBitBool(EnableDiscordPresence);
SettingsWrapBitBool(InhibitScreensaver);
SettingsWrapBitBool(HostFs);