Merge branch 'master' of https://github.com/finalburnneo/FBNeo into finalburnneo-master

2024-11-23 00:49:57 +00:00 · 2024-11-20 09:47:37 +01:00 · 2024-11-20 09:47:37 +01:00 · a4a60556b9
commit a4a60556b9
parent 785d5d664f 8f20d8e022
10 changed files with 1225 additions and 1753 deletions
--- a/makefile.burner_win32_rules
+++ b/makefile.burner_win32_rules
@ -34,7 +34,7 @@ ifdef INCLUDE_7Z_SUPPORT
 depobj	+=	un7z.o \
 			\
 			7zArcIn.o 7zBuf.o 7zBuf2.o 7zCrc.o 7zCrcOpt.o 7zDec.o 7zFile.o 7zStream.o Alloc.o Bcj2.o Bra.o Bra86.o BraIA64.o CpuArch.o \
-			Delta.o LzFindOpt.o LzmaDec.o Lzma2Dec.o MtDec.o Ppmd7.o Ppmd7Dec.o Ppmd7aDec.o Sha256.o Sha1Opt.o Sha256Opt.o SwapBytes.o Threads.o Xxh64.o Xz.o XzCrc64.o XzCrc64Opt.o XzDec.o ZstdDec.o
+			Delta.o LzFindOpt.o LzmaDec.o Lzma2Dec.o MtDec.o Ppmd7.o Ppmd7Dec.o Ppmd7aDec.o Sha256.o Sha1Opt.o Sha256Opt.o Threads.o Xxh64.o Xz.o XzCrc64.o XzCrc64Opt.o XzDec.o ZstdDec.o
 endif

 ifdef INCLUDE_AVI_RECORDING
--- a/makefile.sdl
+++ b/makefile.sdl
@ -75,7 +75,7 @@ ifdef INCLUDE_7Z_SUPPORT
 depobj	+=	un7z.o \
 			\
 			7zArcIn.o 7zBuf.o 7zBuf2.o 7zCrc.o 7zCrcOpt.o 7zDec.o 7zFile.o 7zStream.o Alloc.o Bcj2.o Bra.o Bra86.o BraIA64.o CpuArch.o \
-			Delta.o LzFindOpt.o LzmaDec.o Lzma2Dec.o MtDec.o Ppmd7.o Ppmd7Dec.o Ppmd7aDec.o Sha256.o Sha1Opt.o Sha256Opt.o SwapBytes.o Threads.o Xxh64.o Xz.o XzCrc64.o XzCrc64Opt.o XzDec.o ZstdDec.o
+			Delta.o LzFindOpt.o LzmaDec.o Lzma2Dec.o MtDec.o Ppmd7.o Ppmd7Dec.o Ppmd7aDec.o Sha256.o Sha1Opt.o Sha256Opt.o Threads.o Xxh64.o Xz.o XzCrc64.o XzCrc64Opt.o XzDec.o ZstdDec.o
 endif

 autobj += $(depobj)
--- a/makefile.sdl2
+++ b/makefile.sdl2
@ -80,7 +80,7 @@ ifdef INCLUDE_7Z_SUPPORT
 depobj	+=	un7z.o \
 			\
 			7zArcIn.o 7zBuf.o 7zBuf2.o 7zCrc.o 7zCrcOpt.o 7zDec.o 7zFile.o 7zStream.o Alloc.o Bcj2.o Bra.o Bra86.o BraIA64.o CpuArch.o \
-			Delta.o LzFindOpt.o LzmaDec.o Lzma2Dec.o MtDec.o Ppmd7.o Ppmd7Dec.o Ppmd7aDec.o Sha256.o Sha1Opt.o Sha256Opt.o SwapBytes.o Threads.o Xxh64.o Xz.o XzCrc64.o XzCrc64Opt.o XzDec.o ZstdDec.o
+			Delta.o LzFindOpt.o LzmaDec.o Lzma2Dec.o MtDec.o Ppmd7.o Ppmd7Dec.o Ppmd7aDec.o Sha256.o Sha1Opt.o Sha256Opt.o Threads.o Xxh64.o Xz.o XzCrc64.o XzCrc64Opt.o XzDec.o ZstdDec.o
 endif

 autobj += $(depobj)
--- a/src/burn/drv/megadrive/d_megadrive.cpp
+++ b/src/burn/drv/megadrive/d_megadrive.cpp
@ -47834,7 +47834,7 @@ struct BurnDriver BurnDrvmd_btomatog = {
 // https://romhackplaza.org/romhacks/bishoujo-super-street-fighter-ii-glamor-queen-genesis/

 static struct BurnRomInfo md_bssf2gqRomDesc[] = {
-	{ "Bishoujo Super Street Fighter II - Glamor Queen v1.6 (2024)(Yoni Arousement).bin", 0x500000, 0x87a6c750, BRF_PRG | SEGA_MD_ROM_LOAD16_WORD_SWAP | SEGA_MD_ROM_OFFS_000000  },
+	{ "Bishoujo Super Street Fighter II - Glamor Queen v1.6 (2024)(Yoni Arousement).bin", 5242880, 0xf8aab4d4, BRF_PRG | SEGA_MD_ROM_LOAD16_WORD_SWAP | SEGA_MD_ROM_OFFS_000000  },
 };

 STD_ROM_PICK(md_bssf2gq)
--- a/src/burn/drv/megadrive/megadrive.cpp
+++ b/src/burn/drv/megadrive/megadrive.cpp
--- a/src/burn/drv/snes/d_snes.cpp
+++ b/src/burn/drv/snes/d_snes.cpp
@ -29463,6 +29463,44 @@ struct BurnDriver BurnDrvsnes_Ys4te = {
 	512, 448, 4, 3
 };

+// Yuu Yuu Hakusho
+
+static struct BurnRomInfo snes_YuuyuuhakushoRomDesc[] = {
+	{ "Yuu Yuu Hakusho (1993)(Namcot).sfc", 2097152, 0xec96d517, BRF_ESS | BRF_PRG },
+};
+
+STD_ROM_PICK(snes_Yuuyuuhakusho)
+STD_ROM_FN(snes_Yuuyuuhakusho)
+
+struct BurnDriver BurnDrvsnes_Yuuyuuhakusho = {
+	"snes_yuuyuuhakusho", NULL, NULL, NULL, "1993",
+	"Yuu Yuu Hakusho (Japan)\0", NULL, "Namcot", "Nintendo",
+	NULL, NULL, NULL, NULL,
+	BDF_GAME_WORKING, 2, HARDWARE_SNES, GBF_VSFIGHT, 0,
+	SNESGetZipName, snes_YuuyuuhakushoRomInfo, snes_YuuyuuhakushoRomName, NULL, NULL, NULL, NULL, SNESInputInfo, SNESDIPInfo,
+	DrvInit, DrvExit, DrvFrame, DrvDraw, DrvScan, &DrvRecalc, 0x8000,
+	512, 448, 4, 3
+};
+
+// Yuu Yuu Hakusho Final - Makai Saikyou Retsuden
+
+static struct BurnRomInfo snes_YuuyuuhakushofinalRomDesc[] = {
+	{ "Yuu Yuu Hakusho Final - Makai Saikyou Retsuden (1995)(Namcot).sfc", 3145728, 0x5617a42e, BRF_ESS | BRF_PRG },
+};
+
+STD_ROM_PICK(snes_Yuuyuuhakushofinal)
+STD_ROM_FN(snes_Yuuyuuhakushofinal)
+
+struct BurnDriver BurnDrvsnes_Yuuyuuhakushofinal = {
+	"snes_yuuyuuhakushofinal", NULL, NULL, NULL, "1995",
+	"Yuu Yuu Hakusho Final - Makai Saikyou Retsuden (Japan)\0", NULL, "Namcot", "Nintendo",
+	NULL, NULL, NULL, NULL,
+	BDF_GAME_WORKING, 2, HARDWARE_SNES, GBF_VSFIGHT, 0,
+	SNESGetZipName, snes_YuuyuuhakushofinalRomInfo, snes_YuuyuuhakushofinalRomName, NULL, NULL, NULL, NULL, SNESInputInfo, SNESDIPInfo,
+	DrvInit, DrvExit, DrvFrame, DrvDraw, DrvScan, &DrvRecalc, 0x8000,
+	512, 480, 4, 3
+};
+
 // Yuu Yuu Hakusho 2 - Kakutou no Shou (Japan)

 static struct BurnRomInfo snes_Yuuyuuhak2jRomDesc[] = {
@ -30154,10 +30192,10 @@ struct BurnDriver BurnDrvsnes_Ff6rmd = {
 	512, 448, 4, 3
 };

-// Final Fantasy VI: A Soldier's Contingency (Hack, v2.01)
+// Final Fantasy VI: A Soldier's Contingency (Hack, v2.02)
 // https://romhackplaza.org/romhacks/final-fantasy-vi-a-soldiers-contingency-extended-bestiary-edition-snes/
 static struct BurnRomInfo snes_Ff6scRomDesc[] = {
-	{ "Final Fantasy VI - A Soldier's Contingency (Extended Bestiary Edition) v2.01 (2024)(Lightning Hunter).sfc", 6291456, 0xfd1a4f8b, BRF_ESS | BRF_PRG },
+	{ "Final Fantasy VI - A Soldier's Contingency (Extended Bestiary Edition) v2.02 (2024)(Lightning Hunter).sfc", 6291456, 0x7d309e30, BRF_ESS | BRF_PRG },
 };

 STD_ROM_PICK(snes_Ff6sc)
@ -30165,7 +30203,7 @@ STD_ROM_FN(snes_Ff6sc)

 struct BurnDriver BurnDrvsnes_Ff6sc = {
 	"snes_ff6sc", "snes_finalfantiii", NULL, NULL, "2024",
-	"Final Fantasy VI: A Soldier's Contingency (Hack, v2.01)\0", "(Extended Bestiary Edition)", "Lightning Hunter", "Nintendo",
+	"Final Fantasy VI: A Soldier's Contingency (Hack, v2.02)\0", "(Extended Bestiary Edition)", "Lightning Hunter", "Nintendo",
 	NULL, NULL, NULL, NULL,
 	BDF_GAME_WORKING | BDF_CLONE | BDF_HACK, 1, HARDWARE_SNES, GBF_RPG, 0,
 	SNESGetZipName, snes_Ff6scRomInfo, snes_Ff6scRomName, NULL, NULL, NULL, NULL, SNESInputInfo, SNESDIPInfo,
@ -31770,3 +31808,21 @@ struct BurnDriver BurnDrvsnes_Zombies2h = {
 	512, 448, 4, 3
 };

+// Quinty
+
+static struct BurnRomInfo snes_QuintyRomDesc[] = {
+	{ "Quinty (1999)(Game Freak).sfc", 2097152, 0x1225927b, BRF_ESS | BRF_PRG },
+};
+
+STD_ROM_PICK(snes_Quinty)
+STD_ROM_FN(snes_Quinty)
+
+struct BurnDriver BurnDrvsnes_Quinty = {
+	"snes_quinty", NULL, NULL, NULL, "1999",
+	"Quinty (Japan, Prototype)\0", NULL, "Game Freak", "Nintendo",
+	NULL, NULL, NULL, NULL,
+	BDF_GAME_WORKING | BDF_PROTOTYPE, 1, HARDWARE_SNES, GBF_MISC, 0,
+	SNESGetZipName, snes_QuintyRomInfo, snes_QuintyRomName, NULL, NULL, NULL, NULL, SNESInputInfo, SNESDIPInfo,
+	DrvInit, DrvExit, DrvFrame, DrvDraw, DrvScan, &DrvRecalc, 0x8000,
+	512, 448, 4, 3
+};
--- a/src/burn/drv/snes/dsp.cpp
+++ b/src/burn/drv/snes/dsp.cpp
@ -135,6 +135,7 @@ void dsp_reset(Dsp* dsp) {
  memset(dsp->firBufferR, 0, sizeof(dsp->firBufferR));
  memset(dsp->sampleBuffer, 0, sizeof(dsp->sampleBuffer));
  dsp->sampleOffset = 0;
+  dsp->sampleCount = 0;
  dsp->lastFrameBoundary = 0;
 }

@ -186,6 +187,7 @@ void dsp_handleState(Dsp* dsp, StateHandler* sh) {
  }
  sh_handleByteArray(sh, dsp->ram, 0x80);
 //  sh_handleByteArray(sh, (UINT8*)&dsp->sampleBuffer[0], 0x800*2*2);
+//  sh_handleInts(sh, dsp->sampleCount, NULL);
 }

 void dsp_cycle(Dsp* dsp) {
@ -207,8 +209,9 @@ void dsp_cycle(Dsp* dsp) {
  }
  if (bBurnRunAheadFrame == 0) {
    // put final sample in the samplebuffer
-    dsp->sampleBuffer[(dsp->sampleOffset & 0x7ff) * 2] = dsp->sampleOutL;
+    dsp->sampleBuffer[(dsp->sampleOffset   & 0x7ff) * 2 + 0] = dsp->sampleOutL;
    dsp->sampleBuffer[(dsp->sampleOffset++ & 0x7ff) * 2 + 1] = dsp->sampleOutR;
+	dsp->sampleCount++;
  }
 }
 static int clamp16(int val) {
@ -590,11 +593,12 @@ void dsp_write(Dsp* dsp, uint8_t adr, uint8_t val) {

 void dsp_getSamples(Dsp* dsp, int16_t* sampleData, int samplesPerFrame) {
  // resample from 534 / 641 samples per frame to wanted value
-  float wantedSamples = (dsp->apu->snes->palTiming ? 641.0 : 534.0);
+  float wantedSamples = dsp->sampleCount; // (dsp->apu->snes->palTiming ? 641.0 : 534.0);
+  dsp->sampleCount = 0;
  double adder = wantedSamples / samplesPerFrame;
  double location = dsp->lastFrameBoundary - wantedSamples;
  for(int i = 0; i < samplesPerFrame; i++) {
-    sampleData[i * 2] = dsp->sampleBuffer[(((int) location) & 0x7ff) * 2];
+    sampleData[i * 2 + 0] = dsp->sampleBuffer[(((int) location) & 0x7ff) * 2 + 0];
    sampleData[i * 2 + 1] = dsp->sampleBuffer[(((int) location) & 0x7ff) * 2 + 1];
    location += adder;
  }
--- a/src/burn/drv/snes/dsp.h
+++ b/src/burn/drv/snes/dsp.h
@ -83,6 +83,7 @@ struct Dsp {
  // sample ring buffer (2048 samples, *2 for stereo)
  int16_t sampleBuffer[0x800 * 2];
  uint16_t sampleOffset; // current offset in samplebuffer
+  uint32_t sampleCount; // samples generated since last render
  uint16_t lastFrameBoundary;
 };

--- a/src/dep/libs/lib7z/SwapBytes.c
+++ b/src/dep/libs/lib7z/SwapBytes.c
@ -1,835 +0,0 @@
-/* SwapBytes.c -- Byte Swap conversion filter
-2024-03-01 : Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include "Compiler.h"
-#include "CpuArch.h"
-#include "RotateDefs.h"
-#include "SwapBytes.h"
-
-typedef UInt16 CSwapUInt16;
-typedef UInt32 CSwapUInt32;
-
-// #define k_SwapBytes_Mode_BASE   0
-
-#ifdef MY_CPU_X86_OR_AMD64
-
-#define k_SwapBytes_Mode_SSE2   1
-#define k_SwapBytes_Mode_SSSE3  2
-#define k_SwapBytes_Mode_AVX2   3
-
-  // #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
-  #if defined(__clang__) && (__clang_major__ >= 4) \
-      || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
-      #define k_SwapBytes_Mode_MAX  k_SwapBytes_Mode_AVX2
-      #define SWAP_ATTRIB_SSE2  __attribute__((__target__("sse2")))
-      #define SWAP_ATTRIB_SSSE3 __attribute__((__target__("ssse3")))
-      #define SWAP_ATTRIB_AVX2  __attribute__((__target__("avx2")))
-  #elif defined(_MSC_VER)
-    #if (_MSC_VER == 1900)
-      #pragma warning(disable : 4752) // found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
-    #endif
-    #if (_MSC_VER >= 1900)
-      #define k_SwapBytes_Mode_MAX  k_SwapBytes_Mode_AVX2
-    #elif (_MSC_VER >= 1500)  // (VS2008)
-      #define k_SwapBytes_Mode_MAX  k_SwapBytes_Mode_SSSE3
-    #elif (_MSC_VER >= 1310)  // (VS2003)
-      #define k_SwapBytes_Mode_MAX  k_SwapBytes_Mode_SSE2
-    #endif
-  #endif // _MSC_VER
-
-/*
-// for debug
-#ifdef k_SwapBytes_Mode_MAX
-#undef k_SwapBytes_Mode_MAX
-#endif
-*/
-
-#ifndef k_SwapBytes_Mode_MAX
-#define k_SwapBytes_Mode_MAX 0
-#endif
-
-#if (k_SwapBytes_Mode_MAX != 0) && defined(MY_CPU_AMD64)
-  #define k_SwapBytes_Mode_MIN  k_SwapBytes_Mode_SSE2
-#else
-  #define k_SwapBytes_Mode_MIN  0
-#endif
-
-#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_AVX2)
-  #define USE_SWAP_AVX2
-#endif
-#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_SSSE3)
-  #define USE_SWAP_SSSE3
-#endif
-#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_SSE2)
-  #define USE_SWAP_128
-#endif
-
-#if k_SwapBytes_Mode_MAX <= k_SwapBytes_Mode_MIN || !defined(USE_SWAP_128)
-#define FORCE_SWAP_MODE
-#endif
-
-
-#ifdef USE_SWAP_128
-/*
- <mmintrin.h> MMX
-<xmmintrin.h> SSE
-<emmintrin.h> SSE2
-<pmmintrin.h> SSE3
-<tmmintrin.h> SSSE3
-<smmintrin.h> SSE4.1
-<nmmintrin.h> SSE4.2
-<ammintrin.h> SSE4A
-<wmmintrin.h> AES
-<immintrin.h> AVX, AVX2, FMA
-*/
-
-#include <emmintrin.h> // sse2
-// typedef __m128i v128;
-
-#define SWAP2_128(i) { \
-  const __m128i v = *(const __m128i *)(const void *)(items + (i) * 8); \
-                    *(      __m128i *)(      void *)(items + (i) * 8) = \
-    _mm_or_si128( \
-      _mm_slli_epi16(v, 8), \
-      _mm_srli_epi16(v, 8)); }
-// _mm_or_si128() has more ports to execute than _mm_add_epi16().
-
-static
-#ifdef SWAP_ATTRIB_SSE2
-SWAP_ATTRIB_SSE2
-#endif
-void
-Z7_FASTCALL
-SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim)
-{
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SWAP2_128(0)  SWAP2_128(1)  items += 2 * 8;
-    SWAP2_128(0)  SWAP2_128(1)  items += 2 * 8;
-  }
-  while (items != lim);
-}
-
-/*
-// sse2
-#define SWAP4_128_pack(i) { \
-  __m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \
-  __m128i v0 = _mm_unpacklo_epi8(v, mask); \
-  __m128i v1 = _mm_unpackhi_epi8(v, mask); \
-  v0 = _mm_shufflelo_epi16(v0, 0x1b); \
-  v1 = _mm_shufflelo_epi16(v1, 0x1b); \
-  v0 = _mm_shufflehi_epi16(v0, 0x1b); \
-  v1 = _mm_shufflehi_epi16(v1, 0x1b); \
-  *(__m128i *)(void *)(items + (i) * 4) = _mm_packus_epi16(v0, v1); }
-
-static
-#ifdef SWAP_ATTRIB_SSE2
-SWAP_ATTRIB_SSE2
-#endif
-void
-Z7_FASTCALL
-SwapBytes4_128_pack(CSwapUInt32 *items, const CSwapUInt32 *lim)
-{
-  const __m128i mask = _mm_setzero_si128();
-  // const __m128i mask = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SWAP4_128_pack(0); items += 1 * 4;
-    // SWAP4_128_pack(0); SWAP4_128_pack(1); items += 2 * 4;
-  }
-  while (items != lim);
-}
-
-// sse2
-#define SWAP4_128_shift(i) { \
-  __m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \
-  __m128i v2; \
-  v2 = _mm_or_si128( \
-        _mm_slli_si128(_mm_and_si128(v, mask), 1), \
-        _mm_and_si128(_mm_srli_si128(v, 1), mask)); \
-  v = _mm_or_si128( \
-        _mm_slli_epi32(v, 24), \
-        _mm_srli_epi32(v, 24)); \
-  *(__m128i *)(void *)(items + (i) * 4) = _mm_or_si128(v2, v); }
-
-static
-#ifdef SWAP_ATTRIB_SSE2
-SWAP_ATTRIB_SSE2
-#endif
-void
-Z7_FASTCALL
-SwapBytes4_128_shift(CSwapUInt32 *items, const CSwapUInt32 *lim)
-{
-  #define M1 0xff00
-  const __m128i mask = _mm_set_epi32(M1, M1, M1, M1);
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    // SWAP4_128_shift(0)  SWAP4_128_shift(1)  items += 2 * 4;
-    // SWAP4_128_shift(0)  SWAP4_128_shift(1)  items += 2 * 4;
-    SWAP4_128_shift(0); items += 1 * 4;
-  }
-  while (items != lim);
-}
-*/
-
-
-#if defined(USE_SWAP_SSSE3) || defined(USE_SWAP_AVX2)
-
-#define SWAP_SHUF_REV_SEQ_2_VALS(v)                (v)+1, (v)
-#define SWAP_SHUF_REV_SEQ_4_VALS(v)  (v)+3, (v)+2, (v)+1, (v)
-
-#define SWAP2_SHUF_MASK_16_BYTES \
-    SWAP_SHUF_REV_SEQ_2_VALS (0 * 2), \
-    SWAP_SHUF_REV_SEQ_2_VALS (1 * 2), \
-    SWAP_SHUF_REV_SEQ_2_VALS (2 * 2), \
-    SWAP_SHUF_REV_SEQ_2_VALS (3 * 2), \
-    SWAP_SHUF_REV_SEQ_2_VALS (4 * 2), \
-    SWAP_SHUF_REV_SEQ_2_VALS (5 * 2), \
-    SWAP_SHUF_REV_SEQ_2_VALS (6 * 2), \
-    SWAP_SHUF_REV_SEQ_2_VALS (7 * 2)
-
-#define SWAP4_SHUF_MASK_16_BYTES \
-    SWAP_SHUF_REV_SEQ_4_VALS (0 * 4), \
-    SWAP_SHUF_REV_SEQ_4_VALS (1 * 4), \
-    SWAP_SHUF_REV_SEQ_4_VALS (2 * 4), \
-    SWAP_SHUF_REV_SEQ_4_VALS (3 * 4)
-
-#if defined(USE_SWAP_AVX2)
-/* if we use 256_BIT_INIT_MASK, each static array mask will be larger for 16 bytes */
-// #define SWAP_USE_256_BIT_INIT_MASK
-#endif
-
-#if defined(SWAP_USE_256_BIT_INIT_MASK) && defined(USE_SWAP_AVX2)
-#define SWAP_MASK_INIT_SIZE 32
-#else
-#define SWAP_MASK_INIT_SIZE 16
-#endif
-
-MY_ALIGN(SWAP_MASK_INIT_SIZE)
-static const Byte k_ShufMask_Swap2[] =
-{
-    SWAP2_SHUF_MASK_16_BYTES
-  #if SWAP_MASK_INIT_SIZE > 16
-  , SWAP2_SHUF_MASK_16_BYTES
-  #endif
-};
-
-MY_ALIGN(SWAP_MASK_INIT_SIZE)
-static const Byte k_ShufMask_Swap4[] =
-{
-    SWAP4_SHUF_MASK_16_BYTES
-  #if SWAP_MASK_INIT_SIZE > 16
-  , SWAP4_SHUF_MASK_16_BYTES
-  #endif
-};
-
-
-#ifdef USE_SWAP_SSSE3
-
-#include <tmmintrin.h> // ssse3
-
-#define SHUF_128(i)   *(items + (i)) = \
-     _mm_shuffle_epi8(*(items + (i)), mask); // SSSE3
-
-// Z7_NO_INLINE
-static
-#ifdef SWAP_ATTRIB_SSSE3
-SWAP_ATTRIB_SSSE3
-#endif
-Z7_ATTRIB_NO_VECTORIZE
-void
-Z7_FASTCALL
-ShufBytes_128(void *items8, const void *lim8, const void *mask128_ptr)
-{
-  __m128i *items = (__m128i *)items8;
-  const __m128i *lim = (const __m128i *)lim8;
-  // const __m128i mask = _mm_set_epi8(SHUF_SWAP2_MASK_16_VALS);
-  // const __m128i mask = _mm_set_epi8(SHUF_SWAP4_MASK_16_VALS);
-  // const __m128i mask = _mm_load_si128((const __m128i *)(const void *)&(k_ShufMask_Swap4[0]));
-  // const __m128i mask = _mm_load_si128((const __m128i *)(const void *)&(k_ShufMask_Swap4[0]));
-  // const __m128i mask = *(const __m128i *)(const void *)&(k_ShufMask_Swap4[0]);
-  const __m128i mask = *(const __m128i *)mask128_ptr;
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SHUF_128(0)  SHUF_128(1)  items += 2;
-    SHUF_128(0)  SHUF_128(1)  items += 2;
-  }
-  while (items != lim);
-}
-
-#endif // USE_SWAP_SSSE3
-
-
-
-#ifdef USE_SWAP_AVX2
-
-#include <immintrin.h> // avx, avx2
-#if defined(__clang__)
-#include <avxintrin.h>
-#include <avx2intrin.h>
-#endif
-
-#define SHUF_256(i)   *(items + (i)) = \
-  _mm256_shuffle_epi8(*(items + (i)), mask); // AVX2
-
-// Z7_NO_INLINE
-static
-#ifdef SWAP_ATTRIB_AVX2
-SWAP_ATTRIB_AVX2
-#endif
-Z7_ATTRIB_NO_VECTORIZE
-void
-Z7_FASTCALL
-ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr)
-{
-  __m256i *items = (__m256i *)items8;
-  const __m256i *lim = (const __m256i *)lim8;
-  /*
-  UNUSED_VAR(mask128_ptr)
-  __m256i mask =
-  for Swap4: _mm256_setr_epi8(SWAP4_SHUF_MASK_16_BYTES, SWAP4_SHUF_MASK_16_BYTES);
-  for Swap2: _mm256_setr_epi8(SWAP2_SHUF_MASK_16_BYTES, SWAP2_SHUF_MASK_16_BYTES);
-  */
-  const __m256i mask =
- #if SWAP_MASK_INIT_SIZE > 16
-      *(const __m256i *)(const void *)mask128_ptr;
- #else
-  /* msvc: broadcastsi128() version reserves the stack for no reason
-     msvc 19.29-: _mm256_insertf128_si256() / _mm256_set_m128i)) versions use non-avx movdqu   xmm0,XMMWORD PTR [r8]
-     msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want
-  */
-  // _mm256_broadcastsi128_si256(*mask128_ptr);
-#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 80000)
-  #define MY_mm256_set_m128i(hi, lo)  _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
-#else
-  #define MY_mm256_set_m128i  _mm256_set_m128i
-#endif
-      MY_mm256_set_m128i(
-        *(const __m128i *)mask128_ptr,
-        *(const __m128i *)mask128_ptr);
- #endif
-  
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SHUF_256(0)  SHUF_256(1)  items += 2;
-    SHUF_256(0)  SHUF_256(1)  items += 2;
-  }
-  while (items != lim);
-}
-
-#endif // USE_SWAP_AVX2
-#endif // USE_SWAP_SSSE3 || USE_SWAP_AVX2
-#endif // USE_SWAP_128
-
-
-
-// compile message "NEON intrinsics not available with the soft-float ABI"
-#elif defined(MY_CPU_ARM_OR_ARM64) \
-    && defined(MY_CPU_LE) \
-    && !defined(Z7_DISABLE_ARM_NEON)
-
-  #if defined(__clang__) && (__clang_major__ >= 8) \
-    || defined(__GNUC__) && (__GNUC__ >= 6)
-    #if defined(__ARM_FP)
-    #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 4)) \
-        || defined(MY_CPU_ARM64)
-    #if  defined(MY_CPU_ARM64) \
-      || !defined(Z7_CLANG_VERSION) \
-      || defined(__ARM_NEON)
-      #define USE_SWAP_128
-    #ifdef MY_CPU_ARM64
-      // #define SWAP_ATTRIB_NEON __attribute__((__target__("")))
-    #else
-#if defined(Z7_CLANG_VERSION)
-      // #define SWAP_ATTRIB_NEON __attribute__((__target__("neon")))
-#else
-      // #pragma message("SWAP_ATTRIB_NEON __attribute__((__target__(fpu=neon))")
-      #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=neon")))
-#endif
-    #endif // MY_CPU_ARM64
-    #endif // __ARM_NEON
-    #endif // __ARM_ARCH
-    #endif // __ARM_FP
-
-  #elif defined(_MSC_VER)
-    #if (_MSC_VER >= 1910)
-      #define USE_SWAP_128
-    #endif
-  #endif
-
-  #ifdef USE_SWAP_128
-  #if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
-    #include <arm64_neon.h>
-  #else
-
-/*
-#if !defined(__ARM_NEON)
-#if defined(Z7_GCC_VERSION) && (__GNUC__  <   5) \
- || defined(Z7_GCC_VERSION) && (__GNUC__ ==   5) && (Z7_GCC_VERSION <  90201) \
- || defined(Z7_GCC_VERSION) && (__GNUC__ ==   5) && (Z7_GCC_VERSION < 100100)
-Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
-#pragma message("#define __ARM_NEON 1")
-// #define __ARM_NEON 1
-Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
-#endif
-#endif
-*/
-    #include <arm_neon.h>
-  #endif
-  #endif
-
-#ifndef USE_SWAP_128
-  #define FORCE_SWAP_MODE
-#else
- 
-#ifdef MY_CPU_ARM64
-  // for debug : comment it
-  #define FORCE_SWAP_MODE
-#else
-  #define k_SwapBytes_Mode_NEON 1
-#endif
-// typedef uint8x16_t v128;
-#define SWAP2_128(i)   *(uint8x16_t *)      (void *)(items + (i) * 8) = \
-      vrev16q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 8));
-#define SWAP4_128(i)   *(uint8x16_t *)      (void *)(items + (i) * 4) = \
-      vrev32q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 4));
-
-// Z7_NO_INLINE
-static
-#ifdef SWAP_ATTRIB_NEON
-SWAP_ATTRIB_NEON
-#endif
-Z7_ATTRIB_NO_VECTORIZE
-void
-Z7_FASTCALL
-SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim)
-{
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SWAP2_128(0)  SWAP2_128(1)  items += 2 * 8;
-    SWAP2_128(0)  SWAP2_128(1)  items += 2 * 8;
-  }
-  while (items != lim);
-}
-
-// Z7_NO_INLINE
-static
-#ifdef SWAP_ATTRIB_NEON
-SWAP_ATTRIB_NEON
-#endif
-Z7_ATTRIB_NO_VECTORIZE
-void
-Z7_FASTCALL
-SwapBytes4_128(CSwapUInt32 *items, const CSwapUInt32 *lim)
-{
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SWAP4_128(0)  SWAP4_128(1)  items += 2 * 4;
-    SWAP4_128(0)  SWAP4_128(1)  items += 2 * 4;
-  }
-  while (items != lim);
-}
-
-#endif // USE_SWAP_128
-
-#else // MY_CPU_ARM_OR_ARM64
-#define FORCE_SWAP_MODE
-#endif // MY_CPU_ARM_OR_ARM64
-
-
-
-
-
-
-#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_X86)
-  /* _byteswap_ushort() in MSVC x86 32-bit works via slow { mov dh, al; mov dl, ah }
-     So we use own versions of byteswap function */
-  #if (_MSC_VER < 1400 )  // old MSVC-X86 without _rotr16() support
-    #define SWAP2_16(i)  { UInt32 v = items[i];  v += (v << 16);  v >>= 8;  items[i] = (CSwapUInt16)v; }
-  #else  // is new MSVC-X86 with fast _rotr16()
-    #include <intrin.h>
-    #define SWAP2_16(i)  { items[i] = _rotr16(items[i], 8); }
-  #endif
-#else  // is not MSVC-X86
-  #define SWAP2_16(i)  { CSwapUInt16 v = items[i];  items[i] = Z7_BSWAP16(v); }
-#endif  // MSVC-X86
-
-#if defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
-  #define SWAP4_32(i)  { CSwapUInt32 v = items[i];  items[i] = Z7_BSWAP32(v); }
-#else
-  #define SWAP4_32(i)  \
-    { UInt32 v = items[i]; \
-      v = ((v & 0xff00ff) << 8) + ((v >> 8) & 0xff00ff); \
-      v = rotlFixed(v, 16); \
-      items[i] = v; }
-#endif
-
-
-
-
-#if defined(FORCE_SWAP_MODE) && defined(USE_SWAP_128)
-  #define DEFAULT_Swap2  SwapBytes2_128
-  #if !defined(MY_CPU_X86_OR_AMD64)
-    #define DEFAULT_Swap4  SwapBytes4_128
-  #endif
-#endif
-
-#if !defined(DEFAULT_Swap2) || !defined(DEFAULT_Swap4)
-
-#define SWAP_BASE_FUNCS_PREFIXES \
-Z7_FORCE_INLINE  \
-static \
-Z7_ATTRIB_NO_VECTOR  \
-void Z7_FASTCALL
-
-
-#if defined(MY_CPU_ARM_OR_ARM64)
-#if defined(__clang__)
-#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
-#endif
-#endif
-
-
-#ifdef MY_CPU_64BIT
-
-#if defined(MY_CPU_ARM64) \
-    && defined(__ARM_ARCH) && (__ARM_ARCH >= 8) \
-    && (  (defined(__GNUC__) && (__GNUC__ >= 4)) \
-       || (defined(__clang__) && (__clang_major__ >= 4)))
-
-  #define SWAP2_64_VAR(v)  asm ("rev16 %x0,%x0" : "+r" (v));
-  #define SWAP4_64_VAR(v)  asm ("rev32 %x0,%x0" : "+r" (v));
-
-#else  // is not ARM64-GNU
-
-#if !defined(MY_CPU_X86_OR_AMD64) || (k_SwapBytes_Mode_MIN == 0) || !defined(USE_SWAP_128)
-  #define SWAP2_64_VAR(v) \
-    v = ( 0x00ff00ff00ff00ff & (v >> 8))  \
-      + ((0x00ff00ff00ff00ff & v) << 8);
-      /* plus gives faster code in MSVC */
-#endif
-
-#ifdef Z7_CPU_FAST_BSWAP_SUPPORTED
-  #define SWAP4_64_VAR(v) \
-    v = Z7_BSWAP64(v); \
-    v = Z7_ROTL64(v, 32);
-#else
-  #define SWAP4_64_VAR(v) \
-    v = ( 0x000000ff000000ff & (v >> 24))  \
-      + ((0x000000ff000000ff & v) << 24 )  \
-      + ( 0x0000ff000000ff00 & (v >>  8))  \
-      + ((0x0000ff000000ff00 & v) <<  8 )  \
-      ;
-#endif
-
-#endif  // ARM64-GNU
-
-
-#ifdef SWAP2_64_VAR
-
-#define SWAP2_64(i) { \
-    UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 4); \
-    SWAP2_64_VAR(v) \
-    *(UInt64 *)(void *)(items + (i) * 4) = v; }
-
-SWAP_BASE_FUNCS_PREFIXES
-SwapBytes2_64(CSwapUInt16 *items, const CSwapUInt16 *lim)
-{
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SWAP2_64(0)  SWAP2_64(1)  items += 2 * 4;
-    SWAP2_64(0)  SWAP2_64(1)  items += 2 * 4;
-  }
-  while (items != lim);
-}
-
-  #define DEFAULT_Swap2  SwapBytes2_64
-  #if !defined(FORCE_SWAP_MODE)
-    #define SWAP2_DEFAULT_MODE 0
-  #endif
-#else // !defined(SWAP2_64_VAR)
-  #define DEFAULT_Swap2  SwapBytes2_128
-  #if !defined(FORCE_SWAP_MODE)
-    #define SWAP2_DEFAULT_MODE 1
-  #endif
-#endif // SWAP2_64_VAR
-
-
-#define SWAP4_64(i) { \
-    UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 2); \
-    SWAP4_64_VAR(v) \
-    *(UInt64 *)(void *)(items + (i) * 2) = v; }
-
-SWAP_BASE_FUNCS_PREFIXES
-SwapBytes4_64(CSwapUInt32 *items, const CSwapUInt32 *lim)
-{
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SWAP4_64(0)  SWAP4_64(1)  items += 2 * 2;
-    SWAP4_64(0)  SWAP4_64(1)  items += 2 * 2;
-  }
-  while (items != lim);
-}
-
-#define DEFAULT_Swap4  SwapBytes4_64
-
-#else  // is not 64BIT
-
-
-#if defined(MY_CPU_ARM_OR_ARM64) \
-    && defined(__ARM_ARCH) && (__ARM_ARCH >= 6) \
-    && (  (defined(__GNUC__) && (__GNUC__ >= 4)) \
-       || (defined(__clang__) && (__clang_major__ >= 4)))
-
-#ifdef MY_CPU_64BIT
-  #define SWAP2_32_VAR(v)  asm ("rev16 %w0,%w0" : "+r" (v));
-#else
-  #define SWAP2_32_VAR(v)  asm ("rev16 %0,%0" : "+r" (v)); // for clang/gcc
-    // asm ("rev16 %r0,%r0" : "+r" (a));  // for gcc
-#endif
-
-#elif defined(_MSC_VER) && (_MSC_VER < 1300) && defined(MY_CPU_X86) \
-    || !defined(Z7_CPU_FAST_BSWAP_SUPPORTED) \
-    || !defined(Z7_CPU_FAST_ROTATE_SUPPORTED)
-  // old msvc doesn't support _byteswap_ulong()
-  #define SWAP2_32_VAR(v) \
-    v = ((v & 0xff00ff) << 8) + ((v >> 8) & 0xff00ff);
-
-#else  // is not ARM and is not old-MSVC-X86 and fast BSWAP/ROTATE are supported
-  #define SWAP2_32_VAR(v) \
-    v = Z7_BSWAP32(v); \
-    v = rotlFixed(v, 16);
-
-#endif  // GNU-ARM*
-
-#define SWAP2_32(i) { \
-    UInt32 v = *(const UInt32 *)(const void *)(items + (i) * 2); \
-    SWAP2_32_VAR(v); \
-    *(UInt32 *)(void *)(items + (i) * 2) = v; }
-
-
-SWAP_BASE_FUNCS_PREFIXES
-SwapBytes2_32(CSwapUInt16 *items, const CSwapUInt16 *lim)
-{
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SWAP2_32(0)  SWAP2_32(1)  items += 2 * 2;
-    SWAP2_32(0)  SWAP2_32(1)  items += 2 * 2;
-  }
-  while (items != lim);
-}
-
-
-SWAP_BASE_FUNCS_PREFIXES
-SwapBytes4_32(CSwapUInt32 *items, const CSwapUInt32 *lim)
-{
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  do
-  {
-    SWAP4_32(0)  SWAP4_32(1)  items += 2;
-    SWAP4_32(0)  SWAP4_32(1)  items += 2;
-  }
-  while (items != lim);
-}
-
-#define DEFAULT_Swap2  SwapBytes2_32
-#define DEFAULT_Swap4  SwapBytes4_32
-#if !defined(FORCE_SWAP_MODE)
-  #define SWAP2_DEFAULT_MODE 0
-#endif
-
-#endif // MY_CPU_64BIT
-#endif // if !defined(DEFAULT_Swap2) || !defined(DEFAULT_Swap4)
-
-
-
-#if !defined(FORCE_SWAP_MODE)
-static unsigned g_SwapBytes_Mode;
-#endif
-
-/* size of largest unrolled loop iteration: 128 bytes = 4 * 32 bytes (AVX). */
-#define SWAP_ITERATION_BLOCK_SIZE_MAX  (1 << 7)
-
-// 32 bytes for (AVX) or 2 * 16-bytes for NEON.
-#define SWAP_VECTOR_ALIGN_SIZE  (1 << 5)
-
-Z7_NO_INLINE
-void z7_SwapBytes2(CSwapUInt16 *items, size_t numItems)
-{
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--)
-  {
-    SWAP2_16(0)
-    items++;
-  }
-  {
-    const size_t k_Align_Mask = SWAP_ITERATION_BLOCK_SIZE_MAX / sizeof(CSwapUInt16) - 1;
-    size_t numItems2 = numItems;
-    CSwapUInt16 *lim;
-    numItems &= k_Align_Mask;
-    numItems2 &= ~(size_t)k_Align_Mask;
-    lim = items + numItems2;
-    if (numItems2 != 0)
-    {
-     #if !defined(FORCE_SWAP_MODE)
-      #ifdef MY_CPU_X86_OR_AMD64
-        #ifdef USE_SWAP_AVX2
-          if (g_SwapBytes_Mode > k_SwapBytes_Mode_SSSE3)
-            ShufBytes_256((__m256i *)(void *)items,
-                (const __m256i *)(const void *)lim,
-                (const __m128i *)(const void *)&(k_ShufMask_Swap2[0]));
-          else
-        #endif
-        #ifdef USE_SWAP_SSSE3
-          if (g_SwapBytes_Mode >= k_SwapBytes_Mode_SSSE3)
-            ShufBytes_128((__m128i *)(void *)items,
-                (const __m128i *)(const void *)lim,
-                (const __m128i *)(const void *)&(k_ShufMask_Swap2[0]));
-          else
-        #endif
-      #endif  // MY_CPU_X86_OR_AMD64
-      #if SWAP2_DEFAULT_MODE == 0
-          if (g_SwapBytes_Mode != 0)
-            SwapBytes2_128(items, lim);
-          else
-      #endif
-     #endif // FORCE_SWAP_MODE
-            DEFAULT_Swap2(items, lim);
-    }
-    items = lim;
-  }
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  for (; numItems != 0; numItems--)
-  {
-    SWAP2_16(0)
-    items++;
-  }
-}
-
-
-Z7_NO_INLINE
-void z7_SwapBytes4(CSwapUInt32 *items, size_t numItems)
-{
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--)
-  {
-    SWAP4_32(0)
-    items++;
-  }
-  {
-    const size_t k_Align_Mask = SWAP_ITERATION_BLOCK_SIZE_MAX / sizeof(CSwapUInt32) - 1;
-    size_t numItems2 = numItems;
-    CSwapUInt32 *lim;
-    numItems &= k_Align_Mask;
-    numItems2 &= ~(size_t)k_Align_Mask;
-    lim = items + numItems2;
-    if (numItems2 != 0)
-    {
-     #if !defined(FORCE_SWAP_MODE)
-      #ifdef MY_CPU_X86_OR_AMD64
-        #ifdef USE_SWAP_AVX2
-          if (g_SwapBytes_Mode > k_SwapBytes_Mode_SSSE3)
-            ShufBytes_256((__m256i *)(void *)items,
-                (const __m256i *)(const void *)lim,
-                (const __m128i *)(const void *)&(k_ShufMask_Swap4[0]));
-          else
-        #endif
-        #ifdef USE_SWAP_SSSE3
-          if (g_SwapBytes_Mode >= k_SwapBytes_Mode_SSSE3)
-            ShufBytes_128((__m128i *)(void *)items,
-                (const __m128i *)(const void *)lim,
-                (const __m128i *)(const void *)&(k_ShufMask_Swap4[0]));
-          else
-        #endif
-      #else  // MY_CPU_X86_OR_AMD64
-
-          if (g_SwapBytes_Mode != 0)
-            SwapBytes4_128(items, lim);
-          else
-      #endif  // MY_CPU_X86_OR_AMD64
-     #endif // FORCE_SWAP_MODE
-            DEFAULT_Swap4(items, lim);
-    }
-    items = lim;
-  }
-  Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
-  for (; numItems != 0; numItems--)
-  {
-    SWAP4_32(0)
-    items++;
-  }
-}
-
-
-// #define SHOW_HW_STATUS
-
-#ifdef SHOW_HW_STATUS
-#include <stdio.h>
-#define PRF(x) x
-#else
-#define PRF(x)
-#endif
-
-void z7_SwapBytesPrepare(void)
-{
-#ifndef FORCE_SWAP_MODE
-  unsigned mode = 0; // k_SwapBytes_Mode_BASE;
-
-#ifdef MY_CPU_ARM_OR_ARM64
-  {
-    if (CPU_IsSupported_NEON())
-    {
-      // #pragma message ("=== SwapBytes NEON")
-      PRF(printf("\n=== SwapBytes NEON\n");)
-      mode = k_SwapBytes_Mode_NEON;
-    }
-  }
-#else // MY_CPU_ARM_OR_ARM64
-  {
-    #ifdef USE_SWAP_AVX2
-      if (CPU_IsSupported_AVX2())
-      {
-        // #pragma message ("=== SwapBytes AVX2")
-        PRF(printf("\n=== SwapBytes AVX2\n");)
-        mode = k_SwapBytes_Mode_AVX2;
-      }
-      else
-    #endif
-    #ifdef USE_SWAP_SSSE3
-      if (CPU_IsSupported_SSSE3())
-      {
-        // #pragma message ("=== SwapBytes SSSE3")
-        PRF(printf("\n=== SwapBytes SSSE3\n");)
-        mode = k_SwapBytes_Mode_SSSE3;
-      }
-      else
-    #endif
-    #if !defined(MY_CPU_AMD64)
-      if (CPU_IsSupported_SSE2())
-    #endif
-      {
-        // #pragma message ("=== SwapBytes SSE2")
-        PRF(printf("\n=== SwapBytes SSE2\n");)
-        mode = k_SwapBytes_Mode_SSE2;
-      }
-  }
-#endif // MY_CPU_ARM_OR_ARM64
-  g_SwapBytes_Mode = mode;
-  // g_SwapBytes_Mode = 0; // for debug
-#endif // FORCE_SWAP_MODE
-  PRF(printf("\n=== SwapBytesPrepare\n");)
-}
-
-#undef PRF
--- a/src/dep/libs/lib7z/SwapBytes.h
+++ b/src/dep/libs/lib7z/SwapBytes.h
@ -1,17 +0,0 @@
-/* SwapBytes.h -- Byte Swap conversion filter
-2023-04-02 : Igor Pavlov : Public domain */
-
-#ifndef ZIP7_INC_SWAP_BYTES_H
-#define ZIP7_INC_SWAP_BYTES_H
-
-#include "7zTypes.h"
-
-EXTERN_C_BEGIN
-
-void z7_SwapBytes2(UInt16 *data, size_t numItems);
-void z7_SwapBytes4(UInt32 *data, size_t numItems);
-void z7_SwapBytesPrepare(void);
-
-EXTERN_C_END
-
-#endif