From 10e0f1fc8a56cc5f5a5df23f0b5c27ebf57b3692 Mon Sep 17 00:00:00 2001 From: chenshi51 Date: Mon, 3 Nov 2025 15:42:05 +0800 Subject: [PATCH] upgrade lzma 25.01 Signed-off-by: chenshi51 Change-Id: I2a1f03b523c2b849043c84c28b67d45a404aefbf --- Asm/x86/Sort.asm | 860 +++++++++++++ C/7zVersion.h | 10 +- C/Compiler.h | 12 +- C/CpuArch.h | 8 + C/LzFind.c | 26 +- C/LzFindMt.c | 10 +- C/LzFindMt.h | 6 +- C/Lzma2Enc.c | 4 +- C/Lzma2Enc.h | 1 + C/LzmaEnc.c | 6 + C/LzmaEnc.h | 4 +- C/MtCoder.c | 61 +- C/MtCoder.h | 7 +- C/Sort.c | 367 ++++-- C/Sort.h | 7 +- C/Threads.c | 237 +++- C/Threads.h | 12 +- C/Util/Lzma/LzmaUtil.dsp | 4 + C/Util/LzmaLib/LzmaLib.dsp | 8 +- C/Xz.h | 12 +- C/XzCrc64Opt.c | 4 +- C/XzDec.c | 29 +- C/XzEnc.c | 8 +- C/XzEnc.h | 3 +- C/XzIn.c | 265 ++-- CPP/7zip/7zip_gcc.mak | 6 +- CPP/7zip/Archive/7z/7zCompressionMode.h | 2 + CPP/7zip/Archive/7z/7zHandlerOut.cpp | 30 +- CPP/7zip/Archive/Common/HandlerOut.cpp | 8 +- CPP/7zip/Archive/Common/HandlerOut.h | 33 +- CPP/7zip/Archive/Common/ItemNameUtils.cpp | 35 +- CPP/7zip/Archive/Common/ItemNameUtils.h | 3 + CPP/7zip/Archive/XzHandler.cpp | 14 +- CPP/7zip/Bundles/Alone7z/makefile | 2 +- CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp | 4 +- CPP/7zip/Common/InBuffer.h | 10 + CPP/7zip/Common/MethodProps.cpp | 32 +- CPP/7zip/Common/MethodProps.h | 6 +- CPP/7zip/Common/OutBuffer.h | 29 +- CPP/7zip/Compress/Lzma2Encoder.cpp | 10 +- CPP/7zip/Compress/LzmaEncoder.cpp | 18 + CPP/7zip/Crypto/MyAes.cpp | 27 +- CPP/7zip/ICoder.h | 3 + CPP/7zip/Sort.mak | 6 + CPP/7zip/UI/Client7z/makefile.gcc | 2 +- CPP/7zip/UI/Common/ArchiveCommandLine.cpp | 80 +- CPP/7zip/UI/Common/ArchiveExtractCallback.cpp | 1144 +++++++++++------ CPP/7zip/UI/Common/ArchiveExtractCallback.h | 233 ++-- CPP/7zip/UI/Common/Bench.cpp | 87 +- CPP/7zip/UI/Common/EnumDirItems.cpp | 55 +- CPP/7zip/UI/Common/Extract.cpp | 2 +- CPP/7zip/UI/Common/ExtractingFilePath.cpp | 2 +- CPP/7zip/UI/Common/HashCalc.cpp | 313 +++-- CPP/7zip/UI/Common/HashCalc.h | 21 +- CPP/7zip/UI/Common/LoadCodecs.cpp | 6 +- CPP/7zip/UI/Common/Update.cpp | 4 +- CPP/7zip/UI/Common/Update.h | 2 + CPP/7zip/UI/Common/UpdateCallback.cpp | 110 +- CPP/7zip/UI/Console/Main.cpp | 9 +- CPP/7zip/UI/Console/makefile | 2 +- CPP/Build.mak | 8 +- CPP/Common/MyString.cpp | 29 - CPP/Common/MyString.h | 13 +- CPP/Common/Wildcard.cpp | 13 +- CPP/Windows/FileDir.cpp | 73 +- CPP/Windows/FileDir.h | 18 +- CPP/Windows/FileFind.cpp | 9 +- CPP/Windows/FileIO.h | 43 +- CPP/Windows/FileLink.cpp | 246 ++-- CPP/Windows/FileName.cpp | 71 +- CPP/Windows/FileName.h | 13 +- CPP/Windows/System.cpp | 128 +- CPP/Windows/System.h | 61 +- CPP/Windows/Thread.h | 8 +- CPP/Windows/TimeUtils.cpp | 3 +- DOC/lzma-history.txt | 17 + DOC/lzma-sdk.txt | 30 +- README.OpenSource | 4 +- 78 files changed, 3818 insertions(+), 1290 deletions(-) create mode 100644 Asm/x86/Sort.asm create mode 100644 CPP/7zip/Sort.mak diff --git a/Asm/x86/Sort.asm b/Asm/x86/Sort.asm new file mode 100644 index 0000000..e991a4a --- /dev/null +++ b/Asm/x86/Sort.asm @@ -0,0 +1,860 @@ +; SortTest.asm -- ASM version of HeapSort() function +; Igor Pavlov : Public domain + +include ../../../../Asm/x86/7zAsm.asm + +MY_ASM_START + +ifndef Z7_SORT_ASM_USE_SEGMENT +if (IS_LINUX gt 0) + ; Z7_SORT_ASM_USE_SEGMENT equ 1 +else + ; Z7_SORT_ASM_USE_SEGMENT equ 1 +endif +endif + +ifdef Z7_SORT_ASM_USE_SEGMENT +_TEXT$Z7_SORT SEGMENT ALIGN(64) 'CODE' +MY_ALIGN macro num:req + align num +endm +else +MY_ALIGN macro num:req + ; We expect that ".text" is aligned for 16-bytes. + ; So we don't need large alignment inside our function. + align 16 +endm +endif + + +MY_ALIGN_16 macro + MY_ALIGN 16 +endm + +MY_ALIGN_32 macro + MY_ALIGN 32 +endm + +MY_ALIGN_64 macro + MY_ALIGN 64 +endm + +ifdef x64 + +NUM_PREFETCH_LEVELS equ 3 ; to prefetch 1x 64-bytes line (is good for most cases) +; NUM_PREFETCH_LEVELS equ 4 ; to prefetch 2x 64-bytes lines (better for big arrays) + +acc equ x0 +k equ r0 +k_x equ x0 + +p equ r1 + +s equ r2 +s_x equ x2 + +a0 equ x3 +t0 equ a0 + +a3 equ x5 +qq equ a3 + +a1 equ x6 +t1 equ a1 +t1_r equ r6 + +a2 equ x7 +t2 equ a2 + +i equ r8 +e0 equ x8 + +e1 equ x9 + +num_last equ r10 +num_last_x equ x10 + +next4_lim equ r11 +pref_lim equ r12 + + + +SORT_2_WITH_TEMP_REG macro b0, b1, temp_reg + mov temp_reg, b0 + cmp b0, b1 + cmovae b0, b1 ; min + cmovae b1, temp_reg ; max +endm + +SORT macro b0, b1 + SORT_2_WITH_TEMP_REG b0, b1, acc +endm + +LOAD macro dest:req, index:req + mov dest, [p + 4 * index] +endm + +STORE macro reg:req, index:req + mov [p + 4 * index], reg +endm + + +if (NUM_PREFETCH_LEVELS gt 3) + num_prefetches equ (1 SHL (NUM_PREFETCH_LEVELS - 3)) +else + num_prefetches equ 1 +endif + +PREFETCH_OP macro offs + cur_offset = 7 * 4 ; it's average offset in 64-bytes cache line. + ; cur_offset = 0 ; we can use zero offset, if we are sure that array is aligned for 64-bytes. + rept num_prefetches + if 1 + prefetcht0 byte ptr [p + offs + cur_offset] + else + mov pref_x, dword ptr [p + offs + cur_offset] + endif + cur_offset = cur_offset + 64 + endm +endm + +PREFETCH_MY macro +if 1 + if 1 + shl k, NUM_PREFETCH_LEVELS + 3 + else + ; we delay prefetch instruction to improve main loads + shl k, NUM_PREFETCH_LEVELS + shl k, 3 + ; shl k, 0 + endif + PREFETCH_OP k +elseif 1 + shl k, 3 + PREFETCH_OP k * (1 SHL NUM_PREFETCH_LEVELS) ; change it +endif +endm + + +STEP_1 macro exit_label, prefetch_macro +use_cmov_1 equ 1 ; set 1 for cmov, but it's slower in some cases + ; set 0 for LOAD after adc s, 0 + cmp t0, t1 + if use_cmov_1 + cmovb t0, t1 + ; STORE t0, k + endif + adc s, 0 + if use_cmov_1 eq 0 + LOAD t0, s + endif + cmp qq, t0 + jae exit_label + if 1 ; use_cmov_1 eq 0 + STORE t0, k + endif + prefetch_macro + mov t0, [p + s * 8] + mov t1, [p + s * 8 + 4] + mov k, s + add s, s ; slower for some cpus + ; lea s, dword ptr [s + s] ; slower for some cpus + ; shl s, 1 ; faster for some cpus + ; lea s, dword ptr [s * 2] ; faster for some cpus + rept 0 ; 1000 for debug : 0 for normal + ; number of calls in generate_stage : ~0.6 of number of items + shl k, 0 + endm +endm + + +STEP_2 macro exit_label, prefetch_macro +use_cmov_2 equ 0 ; set 1 for cmov, but it's slower in some cases + ; set 0 for LOAD after adc s, 0 + cmp t0, t1 + if use_cmov_2 + mov t2, t0 + cmovb t2, t1 + ; STORE t2, k + endif + mov t0, [p + s * 8] + mov t1, [p + s * 8 + 4] + cmovb t0, [p + s * 8 + 8] + cmovb t1, [p + s * 8 + 12] + adc s, 0 + if use_cmov_2 eq 0 + LOAD t2, s + endif + cmp qq, t2 + jae exit_label + if 1 ; use_cmov_2 eq 0 + STORE t2, k + endif + prefetch_macro + mov k, s + ; add s, s + ; lea s, [s + s] + shl s, 1 + ; lea s, [s * 2] +endm + + +MOVE_SMALLEST_UP macro STEP, use_prefetch, num_unrolls + LOCAL exit_1, exit_2, leaves, opt_loop, last_nodes + + ; s == k * 2 + ; t0 == (p)[s] + ; t1 == (p)[s + 1] + cmp k, next4_lim + jae leaves + + rept num_unrolls + STEP exit_2 + cmp k, next4_lim + jae leaves + endm + + if use_prefetch + prefetch_macro equ PREFETCH_MY + pref_lim_2 equ pref_lim + ; lea pref_lim, dword ptr [num_last + 1] + ; shr pref_lim, NUM_PREFETCH_LEVELS + 1 + cmp k, pref_lim_2 + jae last_nodes + else + prefetch_macro equ + pref_lim_2 equ next4_lim + endif + +MY_ALIGN_16 +opt_loop: + STEP exit_2, prefetch_macro + cmp k, pref_lim_2 + jb opt_loop + +last_nodes: + ; k >= pref_lim_2 + ; 2 cases are possible: + ; case-1: num_after_prefetch_levels == 0 && next4_lim = pref_lim_2 + ; case-2: num_after_prefetch_levels == NUM_PREFETCH_LEVELS - 1 && + ; next4_lim = pref_lim_2 / (NUM_PREFETCH_LEVELS - 1) + if use_prefetch + yyy = NUM_PREFETCH_LEVELS - 1 + while yyy + yyy = yyy - 1 + STEP exit_2 + if yyy + cmp k, next4_lim + jae leaves + endif + endm + endif + +leaves: + ; k >= next4_lim == (num_last + 1) / 4 must be provided by previous code. + ; we have 2 nodes in (s) level : always + ; we can have some nodes in (s * 2) level : low probability case + ; we have no nodes in (s * 4) level + ; s == k * 2 + ; t0 == (p)[s] + ; t1 == (p)[s + 1] + cmp t0, t1 + cmovb t0, t1 + adc s, 0 + STORE t0, k + + ; t0 == (p)[s] + ; s / 2 == k : (s) is index of max item from (p)[k * 2], (p)[k * 2 + 1] + ; we have 3 possible cases here: + ; s * 2 > num_last : (s) node has no childs + ; s * 2 == num_last : (s) node has 1 leaf child that is last item of array + ; s * 2 < num_last : (s) node has 2 leaf childs. We provide (s * 4 > num_last) + ; we check for (s * 2 > num_last) before "cmp qq, t0" check, because + ; we will replace conditional jump with cmov instruction later. + lea t1_r, dword ptr [s + s] + cmp t1_r, num_last + ja exit_1 ; if (s * 2 > num_last), we have no childs : it's high probability branch + + ; it's low probability branch + ; s * 2 <= num_last + cmp qq, t0 + jae exit_2 + + ; qq < t0, so we go to next level + ; we check 1 or 2 childs in next level + mov t0, [p + s * 8] + mov k, s + mov s, t1_r + cmp t1_r, num_last + je @F ; (s == num_last) means that we have single child in tree + + ; (s < num_last) : so we must read both childs and select max of them. + mov t1, [p + k * 8 + 4] + cmp t0, t1 + cmovb t0, t1 + adc s, 0 +@@: + STORE t0, k +exit_1: + ; t0 == (p)[s], s / 2 == k : (s) is index of max item from (p)[k * 2], (p)[k * 2 + 1] + cmp qq, t0 + cmovb k, s +exit_2: + STORE qq, k +endm + + + + +ifdef Z7_SORT_ASM_USE_SEGMENT +; MY_ALIGN_64 +else + MY_ALIGN_16 +endif + +MY_PROC HeapSort, 2 + +if (IS_LINUX gt 0) + mov p, REG_ABI_PARAM_0 ; r1 <- r7 : linux +endif + mov num_last, REG_ABI_PARAM_1 ; r10 <- r6 : linux + ; r10 <- r2 : win64 + cmp num_last, 2 + jb end_1 + + ; MY_PUSH_PRESERVED_ABI_REGS + MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 + push r12 + + cmp num_last, 4 + ja sort_5 + + LOAD a0, 0 + LOAD a1, 1 + SORT a0, a1 + cmp num_last, 3 + jb end_2 + + LOAD a2, 2 + je sort_3 + + LOAD a3, 3 + SORT a2, a3 + SORT a1, a3 + STORE a3, 3 +sort_3: + SORT a0, a2 + SORT a1, a2 + STORE a2, 2 + jmp end_2 + +sort_5: + ; (num_last > 4) is required here + ; if (num_last >= 6) : we will use optimized loop for leaf nodes loop_down_1 + mov next4_lim, num_last + shr next4_lim, 2 + + dec num_last + mov k, num_last + shr k, 1 + mov i, num_last + shr i, 2 + test num_last, 1 + jnz size_even + + ; ODD number of items. So we compare parent with single child + LOAD t1, num_last + LOAD t0, k + SORT_2_WITH_TEMP_REG t1, t0, t2 + STORE t1, num_last + STORE t0, k + dec k + +size_even: + cmp k, i + jbe loop_down ; jump for num_last == 4 case + +if 0 ; 1 for debug + mov r15, k + mov r14d, 1 ; 100 +loop_benchmark: +endif + ; optimized loop for leaf nodes: + mov t0, [p + k * 8] + mov t1, [p + k * 8 + 4] + +MY_ALIGN_16 +loop_down_1: + ; we compare parent with max of childs: + ; lea s, dword ptr [2 * k] + mov s, k + cmp t0, t1 + cmovb t0, t1 + adc s, s + LOAD t2, k + STORE t0, k + cmp t2, t0 + cmovae s, k + dec k + ; we preload next items before STORE operation for calculated address + mov t0, [p + k * 8] + mov t1, [p + k * 8 + 4] + STORE t2, s + cmp k, i + jne loop_down_1 + +if 0 ; 1 for debug + mov k, r15 + dec r14d + jnz loop_benchmark + ; jmp end_debug +endif + +MY_ALIGN_16 +loop_down: + mov t0, [p + i * 8] + mov t1, [p + i * 8 + 4] + LOAD qq, i + mov k, i + lea s, dword ptr [i + i] + ; jmp end_debug + DOWN_use_prefetch equ 0 + DOWN_num_unrolls equ 0 + MOVE_SMALLEST_UP STEP_1, DOWN_use_prefetch, DOWN_num_unrolls + sub i, 1 + jnb loop_down + + ; jmp end_debug + LOAD e0, 0 + LOAD e1, 1 + + LEVEL_3_LIMIT equ 8 ; 8 is default, but 7 also can work + + cmp num_last, LEVEL_3_LIMIT + 1 + jb main_loop_sort_5 + +MY_ALIGN_16 +main_loop_sort: + ; num_last > LEVEL_3_LIMIT + ; p[size--] = p[0]; + LOAD qq, num_last + STORE e0, num_last + mov e0, e1 + + mov next4_lim, num_last + shr next4_lim, 2 + mov pref_lim, num_last + shr pref_lim, NUM_PREFETCH_LEVELS + 1 + + dec num_last +if 0 ; 1 for debug + ; that optional optimization can improve the performance, if there are identical items in array + ; 3 times improvement : if all items in array are identical + ; 20% improvement : if items are different for 1 bit only + ; 1-10% improvement : if items are different for (2+) bits + ; no gain : if items are different + cmp qq, e1 + jae next_iter_main +endif + LOAD e1, 2 + LOAD t0, 3 + mov k_x, 2 + cmp e1, t0 + cmovb e1, t0 + mov t0, [p + 4 * (4 + 0)] + mov t1, [p + 4 * (4 + 1)] + cmovb t0, [p + 4 * (4 + 2)] + cmovb t1, [p + 4 * (4 + 3)] + adc k_x, 0 + ; (qq <= e1), because the tree is correctly sorted + ; also here we could check (qq >= e1) or (qq == e1) for faster exit + lea s, dword ptr [k + k] + MAIN_use_prefetch equ 1 + MAIN_num_unrolls equ 0 + MOVE_SMALLEST_UP STEP_2, MAIN_use_prefetch, MAIN_num_unrolls + +next_iter_main: + cmp num_last, LEVEL_3_LIMIT + jne main_loop_sort + + ; num_last == LEVEL_3_LIMIT +main_loop_sort_5: + ; 4 <= num_last <= LEVEL_3_LIMIT + ; p[size--] = p[0]; + LOAD qq, num_last + STORE e0, num_last + mov e0, e1 + dec num_last_x + + LOAD e1, 2 + LOAD t0, 3 + mov k_x, 2 + cmp e1, t0 + cmovb e1, t0 + adc k_x, 0 + + lea s_x, dword ptr [k * 2] + cmp s_x, num_last_x + ja exit_2 + + mov t0, [p + k * 8] + je exit_1 + + ; s < num_last + mov t1, [p + k * 8 + 4] + cmp t0, t1 + cmovb t0, t1 + adc s_x, 0 +exit_1: + STORE t0, k + cmp qq, t0 + cmovb k_x, s_x +exit_2: + STORE qq, k + cmp num_last_x, 3 + jne main_loop_sort_5 + + ; num_last == 3 (real_size == 4) + LOAD a0, 2 + LOAD a1, 3 + STORE e1, 2 + STORE e0, 3 + SORT a0, a1 +end_2: + STORE a0, 0 + STORE a1, 1 +; end_debug: + ; MY_POP_PRESERVED_ABI_REGS + pop r12 + MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 +end_1: +MY_ENDP + + + +else +; ------------ x86 32-bit ------------ + +ifdef x64 +IS_CDECL = 0 +endif + +acc equ x0 +k equ r0 +k_x equ acc + +p equ r1 + +num_last equ r2 +num_last_x equ x2 + +a0 equ x3 +t0 equ a0 + +a3 equ x5 +i equ r5 +e0 equ a3 + +a1 equ x6 +qq equ a1 + +a2 equ x7 +s equ r7 +s_x equ a2 + + +SORT macro b0, b1 + cmp b1, b0 + jae @F + if 1 + xchg b0, b1 + else + mov acc, b0 + mov b0, b1 ; min + mov b1, acc ; max + endif +@@: +endm + +LOAD macro dest:req, index:req + mov dest, [p + 4 * index] +endm + +STORE macro reg:req, index:req + mov [p + 4 * index], reg +endm + + +STEP_1 macro exit_label + mov t0, [p + k * 8] + cmp t0, [p + k * 8 + 4] + adc s, 0 + LOAD t0, s + STORE t0, k ; we lookahed stooring for most expected branch + cmp qq, t0 + jae exit_label + ; STORE t0, k ; use if + mov k, s + add s, s + ; lea s, dword ptr [s + s] + ; shl s, 1 + ; lea s, dword ptr [s * 2] +endm + +STEP_BRANCH macro exit_label + mov t0, [p + k * 8] + cmp t0, [p + k * 8 + 4] + jae @F + inc s + mov t0, [p + k * 8 + 4] +@@: + cmp qq, t0 + jae exit_label + STORE t0, k + mov k, s + add s, s +endm + + + +MOVE_SMALLEST_UP macro STEP, num_unrolls, exit_2 + LOCAL leaves, opt_loop, single + + ; s == k * 2 + rept num_unrolls + cmp s, num_last + jae leaves + STEP_1 exit_2 + endm + cmp s, num_last + jb opt_loop + +leaves: + ; (s >= num_last) + jne exit_2 +single: + ; (s == num_last) + mov t0, [p + k * 8] + cmp qq, t0 + jae exit_2 + STORE t0, k + mov k, s + jmp exit_2 + +MY_ALIGN_16 +opt_loop: + STEP exit_2 + cmp s, num_last + jb opt_loop + je single +exit_2: + STORE qq, k +endm + + + + +ifdef Z7_SORT_ASM_USE_SEGMENT +; MY_ALIGN_64 +else + MY_ALIGN_16 +endif + +MY_PROC HeapSort, 2 + ifdef x64 + if (IS_LINUX gt 0) + mov num_last, REG_ABI_PARAM_1 ; r2 <- r6 : linux + mov p, REG_ABI_PARAM_0 ; r1 <- r7 : linux + endif + elseif (IS_CDECL gt 0) + mov num_last, [r4 + REG_SIZE * 2] + mov p, [r4 + REG_SIZE * 1] + endif + cmp num_last, 2 + jb end_1 + MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 + + cmp num_last, 4 + ja sort_5 + + LOAD a0, 0 + LOAD a1, 1 + SORT a0, a1 + cmp num_last, 3 + jb end_2 + + LOAD a2, 2 + je sort_3 + + LOAD a3, 3 + SORT a2, a3 + SORT a1, a3 + STORE a3, 3 +sort_3: + SORT a0, a2 + SORT a1, a2 + STORE a2, 2 + jmp end_2 + +sort_5: + ; num_last > 4 + lea i, dword ptr [num_last - 2] + dec num_last + test i, 1 + jz loop_down + + ; single child + mov t0, [p + num_last * 4] + mov qq, [p + num_last * 2] + dec i + cmp qq, t0 + jae loop_down + + mov [p + num_last * 2], t0 + mov [p + num_last * 4], qq + +MY_ALIGN_16 +loop_down: + mov t0, [p + i * 4] + cmp t0, [p + i * 4 + 4] + mov k, i + mov qq, [p + i * 2] + adc k, 0 + LOAD t0, k + cmp qq, t0 + jae down_next + mov [p + i * 2], t0 + lea s, dword ptr [k + k] + + DOWN_num_unrolls equ 0 + MOVE_SMALLEST_UP STEP_1, DOWN_num_unrolls, down_exit_label +down_next: + sub i, 2 + jnb loop_down + ; jmp end_debug + + LOAD e0, 0 + +MY_ALIGN_16 +main_loop_sort: + ; num_last > 3 + mov t0, [p + 2 * 4] + cmp t0, [p + 3 * 4] + LOAD qq, num_last + STORE e0, num_last + LOAD e0, 1 + mov s_x, 2 + mov k_x, 1 + adc s, 0 + LOAD t0, s + dec num_last + cmp qq, t0 + jae main_exit_label + STORE t0, 1 + mov k, s + add s, s + if 1 + ; for branch data prefetch mode : + ; it's faster for large arrays : larger than (1 << 13) items. + MAIN_num_unrolls equ 10 + STEP_LOOP equ STEP_BRANCH + else + MAIN_num_unrolls equ 0 + STEP_LOOP equ STEP_1 + endif + + MOVE_SMALLEST_UP STEP_LOOP, MAIN_num_unrolls, main_exit_label + + ; jmp end_debug + cmp num_last, 3 + jne main_loop_sort + + ; num_last == 3 (real_size == 4) + LOAD a0, 2 + LOAD a1, 3 + LOAD a2, 1 + STORE e0, 3 ; e0 is alias for a3 + STORE a2, 2 + SORT a0, a1 +end_2: + STORE a0, 0 + STORE a1, 1 +; end_debug: + MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 +end_1: +MY_ENDP + +endif + +ifdef Z7_SORT_ASM_USE_SEGMENT +_TEXT$Z7_SORT ENDS +endif + +if 0 +LEA_IS_D8 (R64) [R2 * 4 + 16] + Lat : TP + 2 : 1 : adl-e + 2 : 3 p056 adl-p + 1 : 2 : p15 hsw-rocket + 1 : 2 : p01 snb-ivb + 1 : 1 : p1 conroe-wsm + 1 : 4 : zen3,zen4 + 2 : 4 : zen1,zen2 + +LEA_B_IS (R64) [R2 + R3 * 4] + Lat : TP + 1 : 1 : adl-e + 2 : 3 p056 adl-p + 1 : 2 : p15 hsw-rocket + 1 : 2 : p01 snb-ivb + 1 : 1 : p1 nhm-wsm + 1 : 1 : p0 conroe-wsm + 1 : 4 : zen3,zen4 + 2 :2,4 : zen1,zen2 + +LEA_B_IS_D8 (R64) [R2 + R3 * 4 + 16] + Lat : TP + 2 : 1 : adl-e + 2 : 3 p056 adl-p + 1 : 2 : p15 ice-rocket + 3 : 1 : p1/p15 hsw-rocket + 3 : 1 : p01 snb-ivb + 1 : 1 : p1 nhm-wsm + 1 : 1 : p0 conroe-wsm + 2,1 : 2 : zen3,zen4 + 2 : 2 : zen1,zen2 + +CMOVB (R64, R64) + Lat : TP + 1,2 : 2 : adl-e + 1 : 2 p06 adl-p + 1 : 2 : p06 bwd-rocket + 1,2 : 2 : p0156+p06 hsw + 1,2 :1.5 : p015+p05 snb-ivb + 1,2 : 1 : p015+p05 nhm + 1 : 1 : 2*p015 conroe + 1 : 2 : zen3,zen4 + 1 : 4 : zen1,zen2 + +ADC (R64, 0) + Lat : TP + 1,2 : 2 : adl-e + 1 : 2 p06 adl-p + 1 : 2 : p06 bwd-rocket + 1 :1.5 : p0156+p06 hsw + 1 :1.5 : p015+p05 snb-ivb + 2 : 1 : 2*p015 conroe-wstm + 1 : 2 : zen1,zen2,zen3,zen4 + +PREFETCHNTA : fetch data into non-temporal cache close to the processor, minimizing cache pollution. + L1 : Pentium3 + L2 : NetBurst + L1, not L2: Core duo, Core 2, Atom processors + L1, not L2, may fetch into L3 with fast replacement: Nehalem, Westmere, Sandy Bridge, ... + NEHALEM: Fills L1/L3, L1 LRU is not updated + L3 with fast replacement: Xeon Processors based on Nehalem, Westmere, Sandy Bridge, ... +PREFETCHT0 : fetch data into all cache levels. +PREFETCHT1 : fetch data into L2 and L3 +endif + +end diff --git a/C/7zVersion.h b/C/7zVersion.h index 2416c7d..43a13ae 100644 --- a/C/7zVersion.h +++ b/C/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 24 -#define MY_VER_MINOR 9 +#define MY_VER_MAJOR 25 +#define MY_VER_MINOR 1 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "24.09" +#define MY_VERSION_NUMBERS "25.01" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,12 +10,12 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2024-11-29" +#define MY_DATE "2025-08-03" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2025 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR diff --git a/C/Compiler.h b/C/Compiler.h index 6f8db4c..3ede5b6 100644 --- a/C/Compiler.h +++ b/C/Compiler.h @@ -1,5 +1,5 @@ /* Compiler.h : Compiler specific defines and pragmas -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_COMPILER_H #define ZIP7_INC_COMPILER_H @@ -183,6 +183,16 @@ typedef void (*Z7_void_Function)(void); #define Z7_ATTRIB_NO_VECTORIZE #endif +#if defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1920) + #define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE _Pragma("optimize ( \"s\", on )") + #define Z7_PRAGMA_OPTIMIZE_DEFAULT _Pragma("optimize ( \"\", on )") +#else + #define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE + #define Z7_PRAGMA_OPTIMIZE_DEFAULT +#endif + + + #if defined(MY_CPU_X86_OR_AMD64) && ( \ defined(__clang__) && (__clang_major__ >= 4) \ || defined(__GNUC__) && (__GNUC__ >= 5)) diff --git a/C/CpuArch.h b/C/CpuArch.h index 699aab4..1afc179 100644 --- a/C/CpuArch.h +++ b/C/CpuArch.h @@ -47,6 +47,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_SIZEOF_POINTER 4 #endif +#if defined(__SSE2__) \ + || defined(MY_CPU_AMD64) \ + || defined(_M_IX86_FP) && (_M_IX86_FP >= 2) +#define MY_CPU_SSE2 +#endif + #if defined(_M_ARM64) \ || defined(_M_ARM64EC) \ @@ -571,10 +577,12 @@ problem-4 : performace: #define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) #define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) #define Z7_CONV_NATIVE_TO_BE_32(v) (v) +// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b1) | ((b0) << 8)) #elif defined(MY_CPU_LE) #define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) #define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v) #define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v) +// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b0) | ((b1) << 8)) #else #error Stop_Compiling_Unknown_Endian_CONV #endif diff --git a/C/LzFind.c b/C/LzFind.c index 8975fc2..434bfc2 100644 --- a/C/LzFind.c +++ b/C/LzFind.c @@ -1,5 +1,5 @@ /* LzFind.c -- Match finder for LZ algorithms -2024-03-01 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -404,7 +404,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, const unsigned nbMax = (p->numHashBytes == 2 ? 16 : (p->numHashBytes == 3 ? 24 : 32)); - if (numBits > nbMax) + if (numBits >= nbMax) numBits = nbMax; if (numBits >= 32) hs = (UInt32)0 - 1; @@ -416,14 +416,14 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs |= (256 << kLzHash_CrcShift_2) - 1; { const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize); - if (hs > hs2) + if (hs >= hs2) hs = hs2; } hsCur = hs; if (p->expectedDataSize < historySize) { const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize); - if (hsCur > hs2) + if (hsCur >= hs2) hsCur = hs2; } } @@ -434,7 +434,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, if (p->expectedDataSize < historySize) { hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize); - if (hsCur > hs) // is it possible? + if (hsCur >= hs) // is it possible? hsCur = hs; } } @@ -598,7 +598,7 @@ void MatchFinder_Init(void *_p) #ifdef MY_CPU_X86_OR_AMD64 #if defined(__clang__) && (__clang_major__ >= 4) \ - || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701) + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) #define USE_LZFIND_SATUR_SUB_128 @@ -890,7 +890,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, return d; { const Byte *pb = cur - delta; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)]; if (pb[maxLen] == cur[maxLen] && *pb == *cur) { UInt32 len = 0; @@ -925,7 +925,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, break; { ptrdiff_t diff; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)]; diff = (ptrdiff_t)0 - (ptrdiff_t)delta; if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) { @@ -972,7 +972,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } cmCheck = (UInt32)(pos - _cyclicBufferSize); - if ((UInt32)pos <= _cyclicBufferSize) + if ((UInt32)pos < _cyclicBufferSize) cmCheck = 0; if (cmCheck < curMatch) @@ -980,7 +980,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt { const UInt32 delta = pos - curMatch; { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); const UInt32 pair0 = pair[0]; @@ -1039,7 +1039,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const UInt32 cmCheck; cmCheck = (UInt32)(pos - _cyclicBufferSize); - if ((UInt32)pos <= _cyclicBufferSize) + if ((UInt32)pos < _cyclicBufferSize) cmCheck = 0; if (// curMatch >= pos || // failure @@ -1048,7 +1048,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const { const UInt32 delta = pos - curMatch; { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) @@ -1595,7 +1595,7 @@ static void Bt5_MatchFinder_Skip(void *_p, UInt32 num) UInt32 pos = p->pos; \ UInt32 num2 = num; \ /* (p->pos == p->posLimit) is not allowed here !!! */ \ - { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \ + { const UInt32 rem = p->posLimit - pos; if (num2 >= rem) num2 = rem; } \ num -= num2; \ { const UInt32 cycPos = p->cyclicBufferPos; \ son = p->son + cycPos; \ diff --git a/C/LzFindMt.c b/C/LzFindMt.c index 260e321..a79f8e5 100644 --- a/C/LzFindMt.c +++ b/C/LzFindMt.c @@ -1,5 +1,5 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -82,6 +82,8 @@ extern UInt64 g_NumIters_Bytes; Z7_NO_INLINE static void MtSync_Construct(CMtSync *p) { + p->affinityGroup = -1; + p->affinityInGroup = 0; p->affinity = 0; p->wasCreated = False; p->csWasInitialized = False; @@ -259,6 +261,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void * // return ERROR_TOO_MANY_POSTS; // for debug // return EINVAL; // for debug +#ifdef _WIN32 + if (p->affinityGroup >= 0) + wres = Thread_Create_With_Group(&p->thread, startAddress, obj, + (unsigned)(UInt32)p->affinityGroup, (CAffinityMask)p->affinityInGroup); + else +#endif if (p->affinity != 0) wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); else diff --git a/C/LzFindMt.h b/C/LzFindMt.h index 0eb2a43..7e3ab28 100644 --- a/C/LzFindMt.h +++ b/C/LzFindMt.h @@ -1,5 +1,5 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZ_FIND_MT_H #define ZIP7_INC_LZ_FIND_MT_H @@ -12,8 +12,10 @@ EXTERN_C_BEGIN typedef struct { UInt32 numProcessedBlocks; - CThread thread; + Int32 affinityGroup; + UInt64 affinityInGroup; UInt64 affinity; + CThread thread; BoolInt wasCreated; BoolInt needStart; diff --git a/C/Lzma2Enc.c b/C/Lzma2Enc.c index 85aa80d..6848006 100644 --- a/C/Lzma2Enc.c +++ b/C/Lzma2Enc.c @@ -1,5 +1,5 @@ /* Lzma2Enc.c -- LZMA2 Encoder -2023-04-13 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -235,6 +235,7 @@ void Lzma2EncProps_Init(CLzma2EncProps *p) p->numBlockThreads_Reduced = -1; p->numBlockThreads_Max = -1; p->numTotalThreads = -1; + p->numThreadGroups = 0; } void Lzma2EncProps_Normalize(CLzma2EncProps *p) @@ -781,6 +782,7 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle p, } p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max; + p->mtCoder.numThreadGroups = p->props.numThreadGroups; p->mtCoder.expectedDataSize = p->expectedDataSize; { diff --git a/C/Lzma2Enc.h b/C/Lzma2Enc.h index bead0fc..2588db7 100644 --- a/C/Lzma2Enc.h +++ b/C/Lzma2Enc.h @@ -18,6 +18,7 @@ typedef struct int numBlockThreads_Reduced; int numBlockThreads_Max; int numTotalThreads; + unsigned numThreadGroups; // 0 : no groups } CLzma2EncProps; void Lzma2EncProps_Init(CLzma2EncProps *p); diff --git a/C/LzmaEnc.c b/C/LzmaEnc.c index 166e01b..cd1cd0e 100644 --- a/C/LzmaEnc.c +++ b/C/LzmaEnc.c @@ -62,7 +62,9 @@ void LzmaEncProps_Init(CLzmaEncProps *p) p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->numHashOutBits = 0; p->writeEndMark = 0; + p->affinityGroup = -1; p->affinity = 0; + p->affinityInGroup = 0; } void LzmaEncProps_Normalize(CLzmaEncProps *p) @@ -598,6 +600,10 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2) p->multiThread = (props.numThreads > 1); p->matchFinderMt.btSync.affinity = p->matchFinderMt.hashSync.affinity = props.affinity; + p->matchFinderMt.btSync.affinityGroup = + p->matchFinderMt.hashSync.affinityGroup = props.affinityGroup; + p->matchFinderMt.btSync.affinityInGroup = + p->matchFinderMt.hashSync.affinityInGroup = props.affinityInGroup; #endif return SZ_OK; diff --git a/C/LzmaEnc.h b/C/LzmaEnc.h index 08711cb..a8d629e 100644 --- a/C/LzmaEnc.h +++ b/C/LzmaEnc.h @@ -1,5 +1,5 @@ /* LzmaEnc.h -- LZMA Encoder -2023-04-13 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZMA_ENC_H #define ZIP7_INC_LZMA_ENC_H @@ -29,11 +29,13 @@ typedef struct int numThreads; /* 1 or 2, default = 2 */ // int _pad; + Int32 affinityGroup; UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. Encoder uses this value to reduce dictionary size */ UInt64 affinity; + UInt64 affinityInGroup; } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps *p); diff --git a/C/MtCoder.c b/C/MtCoder.c index 10ad582..e2e45a6 100644 --- a/C/MtCoder.c +++ b/C/MtCoder.c @@ -1,5 +1,5 @@ /* MtCoder.c -- Multi-thread Coder -2023-09-07 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -39,14 +39,28 @@ void MtProgressThunk_CreateVTable(CMtProgressThunk *p) static THREAD_FUNC_DECL ThreadFunc(void *pp); -static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t) +static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t +#ifdef _WIN32 + , CMtCoder * const mtc +#endif + ) { WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->startEvent); + // printf("\n====== MtCoderThread_CreateAndStart : \n"); if (wres == 0) { t->stop = False; if (!Thread_WasCreated(&t->thread)) - wres = Thread_Create(&t->thread, ThreadFunc, t); + { +#ifdef _WIN32 + if (mtc->numThreadGroups) + wres = Thread_Create_With_Group(&t->thread, ThreadFunc, t, + ThreadNextGroup_GetNext(&mtc->nextGroup), // group + 0); // affinityMask + else +#endif + wres = Thread_Create(&t->thread, ThreadFunc, t); + } if (wres == 0) wres = Event_Set(&t->startEvent); } @@ -56,6 +70,7 @@ static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t) } +Z7_FORCE_INLINE static void MtCoderThread_Destruct(CMtCoderThread *t) { if (Thread_WasCreated(&t->thread)) @@ -85,7 +100,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t) static SRes ThreadFunc2(CMtCoderThread *t) { - CMtCoder *mtc = t->mtCoder; + CMtCoder * const mtc = t->mtCoder; for (;;) { @@ -185,7 +200,11 @@ static SRes ThreadFunc2(CMtCoderThread *t) if (mtc->numStartedThreads < mtc->numStartedThreadsLimit && mtc->expectedDataSize != readProcessed) { - res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]); + res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads] +#ifdef _WIN32 + , mtc +#endif + ); if (res == SZ_OK) mtc->numStartedThreads++; else @@ -221,7 +240,7 @@ static SRes ThreadFunc2(CMtCoderThread *t) } { - CMtCoderBlock *block = &mtc->blocks[bi]; + CMtCoderBlock * const block = &mtc->blocks[bi]; block->res = res; block->bufIndex = bufIndex; block->finished = finished; @@ -311,7 +330,7 @@ static SRes ThreadFunc2(CMtCoderThread *t) static THREAD_FUNC_DECL ThreadFunc(void *pp) { - CMtCoderThread *t = (CMtCoderThread *)pp; + CMtCoderThread * const t = (CMtCoderThread *)pp; for (;;) { if (Event_Wait(&t->startEvent) != 0) @@ -319,7 +338,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp) if (t->stop) return 0; { - SRes res = ThreadFunc2(t); + const SRes res = ThreadFunc2(t); CMtCoder *mtc = t->mtCoder; if (res != SZ_OK) { @@ -328,7 +347,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp) #ifndef MTCODER_USE_WRITE_THREAD { - unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads); + const unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads); if (numFinished == mtc->numStartedThreads) if (Event_Set(&mtc->finishedEvent) != 0) return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD; @@ -346,6 +365,7 @@ void MtCoder_Construct(CMtCoder *p) p->blockSize = 0; p->numThreadsMax = 0; + p->numThreadGroups = 0; p->expectedDataSize = (UInt64)(Int64)-1; p->inStream = NULL; @@ -429,6 +449,8 @@ SRes MtCoder_Code(CMtCoder *p) unsigned i; SRes res = SZ_OK; + // printf("\n====== MtCoder_Code : \n"); + if (numThreads > MTCODER_THREADS_MAX) numThreads = MTCODER_THREADS_MAX; numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads); @@ -492,11 +514,22 @@ SRes MtCoder_Code(CMtCoder *p) p->numStartedThreadsLimit = numThreads; p->numStartedThreads = 0; + ThreadNextGroup_Init(&p->nextGroup, p->numThreadGroups, 0); // startGroup // for (i = 0; i < numThreads; i++) { + // here we create new thread for first block. + // And each new thread will create another new thread after block reading + // until numStartedThreadsLimit is reached. CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++]; - RINOK(MtCoderThread_CreateAndStart(nextThread)) + { + const SRes res2 = MtCoderThread_CreateAndStart(nextThread +#ifdef _WIN32 + , p +#endif + ); + RINOK(res2) + } } RINOK_THREAD(Event_Set(&p->readEvent)) @@ -513,9 +546,9 @@ SRes MtCoder_Code(CMtCoder *p) RINOK_THREAD(Event_Wait(&p->writeEvents[bi])) { - const CMtCoderBlock *block = &p->blocks[bi]; - unsigned bufIndex = block->bufIndex; - BoolInt finished = block->finished; + const CMtCoderBlock * const block = &p->blocks[bi]; + const unsigned bufIndex = block->bufIndex; + const BoolInt finished = block->finished; if (res == SZ_OK && block->res != SZ_OK) res = block->res; @@ -545,7 +578,7 @@ SRes MtCoder_Code(CMtCoder *p) } #else { - WRes wres = Event_Wait(&p->finishedEvent); + const WRes wres = Event_Wait(&p->finishedEvent); res = MY_SRes_HRESULT_FROM_WRes(wres); } #endif diff --git a/C/MtCoder.h b/C/MtCoder.h index c031fe0..2b182f5 100644 --- a/C/MtCoder.h +++ b/C/MtCoder.h @@ -1,5 +1,5 @@ /* MtCoder.h -- Multi-thread Coder -2023-04-13 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_MT_CODER_H #define ZIP7_INC_MT_CODER_H @@ -16,7 +16,7 @@ EXTERN_C_BEGIN #ifndef Z7_ST #define MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1) - #define MTCODER_THREADS_MAX 64 + #define MTCODER_THREADS_MAX 256 #define MTCODER_BLOCKS_MAX (MTCODER_GET_NUM_BLOCKS_FROM_THREADS(MTCODER_THREADS_MAX) + 3) #else #define MTCODER_THREADS_MAX 1 @@ -77,6 +77,7 @@ typedef struct CMtCoder_ size_t blockSize; /* size of input block */ unsigned numThreadsMax; + unsigned numThreadGroups; UInt64 expectedDataSize; ISeqInStreamPtr inStream; @@ -125,6 +126,8 @@ typedef struct CMtCoder_ CMtProgress mtProgress; CMtCoderBlock blocks[MTCODER_BLOCKS_MAX]; CMtCoderThread threads[MTCODER_THREADS_MAX]; + + CThreadNextGroup nextGroup; } CMtCoder; diff --git a/C/Sort.c b/C/Sort.c index 73dcbf0..0c30ca8 100644 --- a/C/Sort.c +++ b/C/Sort.c @@ -1,141 +1,268 @@ /* Sort.c -- Sort functions -2014-04-05 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" #include "Sort.h" +#include "CpuArch.h" -#define HeapSortDown(p, k, size, temp) \ - { for (;;) { \ - size_t s = (k << 1); \ - if (s > size) break; \ - if (s < size && p[s + 1] > p[s]) s++; \ - if (temp >= p[s]) break; \ - p[k] = p[s]; k = s; \ - } p[k] = temp; } +#if ( (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_prefetch)) \ + ) +// the code with prefetch is slow for small arrays on x86. +// So we disable prefetch for x86. +#ifndef MY_CPU_X86 + // #pragma message("Z7_PREFETCH : __builtin_prefetch") + #define Z7_PREFETCH(a) __builtin_prefetch((a)) +#endif -void HeapSort(UInt32 *p, size_t size) -{ - if (size <= 1) - return; - p--; - { - size_t i = size / 2; - do - { - UInt32 temp = p[i]; - size_t k = i; - HeapSortDown(p, k, size, temp) - } - while (--i != 0); - } - /* - do - { - size_t k = 1; - UInt32 temp = p[size]; - p[size--] = p[1]; - HeapSortDown(p, k, size, temp) - } - while (size > 1); - */ - while (size > 3) - { - UInt32 temp = p[size]; - size_t k = (p[3] > p[2]) ? 3 : 2; - p[size--] = p[1]; - p[1] = p[k]; - HeapSortDown(p, k, size, temp) - } - { - UInt32 temp = p[size]; - p[size] = p[1]; - if (size > 2 && p[2] < temp) - { - p[1] = p[2]; - p[2] = temp; - } - else - p[1] = temp; - } -} +#elif defined(_WIN32) // || defined(_MSC_VER) && (_MSC_VER >= 1200) -void HeapSort64(UInt64 *p, size_t size) -{ - if (size <= 1) - return; - p--; - { - size_t i = size / 2; - do - { - UInt64 temp = p[i]; - size_t k = i; - HeapSortDown(p, k, size, temp) - } - while (--i != 0); - } - /* - do - { - size_t k = 1; - UInt64 temp = p[size]; - p[size--] = p[1]; - HeapSortDown(p, k, size, temp) - } - while (size > 1); - */ - while (size > 3) - { - UInt64 temp = p[size]; - size_t k = (p[3] > p[2]) ? 3 : 2; - p[size--] = p[1]; - p[1] = p[k]; - HeapSortDown(p, k, size, temp) - } - { - UInt64 temp = p[size]; - p[size] = p[1]; - if (size > 2 && p[2] < temp) - { - p[1] = p[2]; - p[2] = temp; - } - else - p[1] = temp; - } -} +#include "7zWindows.h" + +// NOTE: CLANG/GCC/MSVC can define different values for _MM_HINT_T0 / PF_TEMPORAL_LEVEL_1. +// For example, clang-cl can generate "prefetcht2" instruction for +// PreFetchCacheLine(PF_TEMPORAL_LEVEL_1) call. +// But we want to generate "prefetcht0" instruction. +// So for CLANG/GCC we must use __builtin_prefetch() in code branch above +// instead of PreFetchCacheLine() / _mm_prefetch(). + +// New msvc-x86 compiler generates "prefetcht0" instruction for PreFetchCacheLine() call. +// But old x86 cpus don't support "prefetcht0". +// So we will use PreFetchCacheLine(), only if we are sure that +// generated instruction is supported by all cpus of that isa. +#if defined(MY_CPU_AMD64) \ + || defined(MY_CPU_ARM64) \ + || defined(MY_CPU_IA64) +// we need to use additional braces for (a) in PreFetchCacheLine call, because +// PreFetchCacheLine macro doesn't use braces: +// #define PreFetchCacheLine(l, a) _mm_prefetch((CHAR CONST *) a, l) + // #pragma message("Z7_PREFETCH : PreFetchCacheLine") + #define Z7_PREFETCH(a) PreFetchCacheLine(PF_TEMPORAL_LEVEL_1, (a)) +#endif + +#endif // _WIN32 + + +#define PREFETCH_NO(p,k,s,size) + +#ifndef Z7_PREFETCH + #define SORT_PREFETCH(p,k,s,size) +#else + +// #define PREFETCH_LEVEL 2 // use it if cache line is 32-bytes +#define PREFETCH_LEVEL 3 // it is fast for most cases (64-bytes cache line prefetch) +// #define PREFETCH_LEVEL 4 // it can be faster for big array (128-bytes prefetch) + +#if PREFETCH_LEVEL == 0 + + #define SORT_PREFETCH(p,k,s,size) + +#else // PREFETCH_LEVEL != 0 /* -#define HeapSortRefDown(p, vals, n, size, temp) \ - { size_t k = n; UInt32 val = vals[temp]; for (;;) { \ - size_t s = (k << 1); \ - if (s > size) break; \ - if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \ - if (val >= vals[p[s]]) break; \ - p[k] = p[s]; k = s; \ - } p[k] = temp; } +if defined(USE_PREFETCH_FOR_ALIGNED_ARRAY) + we prefetch one value per cache line. + Use it if array is aligned for cache line size (64 bytes) + or if array is small (less than L1 cache size). -void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size) +if !defined(USE_PREFETCH_FOR_ALIGNED_ARRAY) + we perfetch all cache lines that can be required. + it can be faster for big unaligned arrays. +*/ + #define USE_PREFETCH_FOR_ALIGNED_ARRAY + +// s == k * 2 +#if 0 && PREFETCH_LEVEL <= 3 && defined(MY_CPU_X86_OR_AMD64) + // x86 supports (lea r1*8+offset) + #define PREFETCH_OFFSET(k,s) ((s) << PREFETCH_LEVEL) +#else + #define PREFETCH_OFFSET(k,s) ((k) << (PREFETCH_LEVEL + 1)) +#endif + +#if 1 && PREFETCH_LEVEL <= 3 && defined(USE_PREFETCH_FOR_ALIGNED_ARRAY) + #define PREFETCH_ADD_OFFSET 0 +#else + // last offset that can be reqiured in PREFETCH_LEVEL step: + #define PREFETCH_RANGE ((2 << PREFETCH_LEVEL) - 1) + #define PREFETCH_ADD_OFFSET PREFETCH_RANGE / 2 +#endif + +#if PREFETCH_LEVEL <= 3 + +#ifdef USE_PREFETCH_FOR_ALIGNED_ARRAY + #define SORT_PREFETCH(p,k,s,size) \ + { const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_ADD_OFFSET; \ + if (s2 <= size) { \ + Z7_PREFETCH((p + s2)); \ + }} +#else /* for unaligned array */ + #define SORT_PREFETCH(p,k,s,size) \ + { const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE; \ + if (s2 <= size) { \ + Z7_PREFETCH((p + s2 - PREFETCH_RANGE)); \ + Z7_PREFETCH((p + s2)); \ + }} +#endif + +#else // PREFETCH_LEVEL > 3 + +#ifdef USE_PREFETCH_FOR_ALIGNED_ARRAY + #define SORT_PREFETCH(p,k,s,size) \ + { const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE - 16 / 2; \ + if (s2 <= size) { \ + Z7_PREFETCH((p + s2 - 16)); \ + Z7_PREFETCH((p + s2)); \ + }} +#else /* for unaligned array */ + #define SORT_PREFETCH(p,k,s,size) \ + { const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE; \ + if (s2 <= size) { \ + Z7_PREFETCH((p + s2 - PREFETCH_RANGE)); \ + Z7_PREFETCH((p + s2 - PREFETCH_RANGE / 2)); \ + Z7_PREFETCH((p + s2)); \ + }} +#endif + +#endif // PREFETCH_LEVEL > 3 +#endif // PREFETCH_LEVEL != 0 +#endif // Z7_PREFETCH + + +#if defined(MY_CPU_ARM64) \ + /* || defined(MY_CPU_AMD64) */ \ + /* || defined(MY_CPU_ARM) && !defined(_MSC_VER) */ + // we want to use cmov, if cmov is very fast: + // - this cmov version is slower for clang-x64. + // - this cmov version is faster for gcc-arm64 for some fast arm64 cpus. + #define Z7_FAST_CMOV_SUPPORTED +#endif + +#ifdef Z7_FAST_CMOV_SUPPORTED + // we want to use cmov here, if cmov is fast: new arm64 cpus. + // we want the compiler to use conditional move for this branch + #define GET_MAX_VAL(n0, n1, max_val_slow) if (n0 < n1) n0 = n1; +#else + // use this branch, if cpu doesn't support fast conditional move. + // it uses slow array access reading: + #define GET_MAX_VAL(n0, n1, max_val_slow) n0 = max_val_slow; +#endif + +#define HeapSortDown(p, k, size, temp, macro_prefetch) \ +{ \ + for (;;) { \ + UInt32 n0, n1; \ + size_t s = k * 2; \ + if (s >= size) { \ + if (s == size) { \ + n0 = p[s]; \ + p[k] = n0; \ + if (temp < n0) k = s; \ + } \ + break; \ + } \ + n0 = p[k * 2]; \ + n1 = p[k * 2 + 1]; \ + s += n0 < n1; \ + GET_MAX_VAL(n0, n1, p[s]) \ + if (temp >= n0) break; \ + macro_prefetch(p, k, s, size) \ + p[k] = n0; \ + k = s; \ + } \ + p[k] = temp; \ +} + + +/* +stage-1 : O(n) : + we generate intermediate partially sorted binary tree: + p[0] : it's additional item for better alignment of tree structure in memory. + p[1] + p[2] p[3] + p[4] p[5] p[6] p[7] + ... + p[x] >= p[x * 2] + p[x] >= p[x * 2 + 1] + +stage-2 : O(n)*log2(N): + we move largest item p[0] from head of tree to the end of array + and insert last item to sorted binary tree. +*/ + +// (p) must be aligned for cache line size (64-bytes) for best performance + +void Z7_FASTCALL HeapSort(UInt32 *p, size_t size) { - if (size <= 1) + if (size < 2) return; - p--; + if (size == 2) { - size_t i = size / 2; + const UInt32 a0 = p[0]; + const UInt32 a1 = p[1]; + const unsigned k = a1 < a0; + p[k] = a0; + p[k ^ 1] = a1; + return; + } + { + // stage-1 : O(n) + // we transform array to partially sorted binary tree. + size_t i = --size / 2; + // (size) now is the index of the last item in tree, + // if (i) + { + do + { + const UInt32 temp = p[i]; + size_t k = i; + HeapSortDown(p, k, size, temp, PREFETCH_NO) + } + while (--i); + } + { + const UInt32 temp = p[0]; + const UInt32 a1 = p[1]; + if (temp < a1) + { + size_t k = 1; + p[0] = a1; + HeapSortDown(p, k, size, temp, PREFETCH_NO) + } + } + } + + if (size < 3) + { + // size == 2 + const UInt32 a0 = p[0]; + p[0] = p[2]; + p[2] = a0; + return; + } + if (size != 3) + { + // stage-2 : O(size) * log2(size): + // we move largest item p[0] from head to the end of array, + // and insert last item to sorted binary tree. do { - UInt32 temp = p[i]; - HeapSortRefDown(p, vals, i, size, temp); + const UInt32 temp = p[size]; + size_t k = p[2] < p[3] ? 3 : 2; + p[size--] = p[0]; + p[0] = p[1]; + p[1] = p[k]; + HeapSortDown(p, k, size, temp, SORT_PREFETCH) // PREFETCH_NO } - while (--i != 0); + while (size != 3); } - do { - UInt32 temp = p[size]; - p[size--] = p[1]; - HeapSortRefDown(p, vals, 1, size, temp); + const UInt32 a2 = p[2]; + const UInt32 a3 = p[3]; + const size_t k = a2 < a3; + p[2] = p[1]; + p[3] = p[0]; + p[k] = a3; + p[k ^ 1] = a2; } - while (size > 1); } -*/ diff --git a/C/Sort.h b/C/Sort.h index 1bb2b1e..ef8c0b3 100644 --- a/C/Sort.h +++ b/C/Sort.h @@ -1,5 +1,5 @@ /* Sort.h -- Sort functions -2023-03-05 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_SORT_H #define ZIP7_INC_SORT_H @@ -8,10 +8,7 @@ EXTERN_C_BEGIN -void HeapSort(UInt32 *p, size_t size); -void HeapSort64(UInt64 *p, size_t size); - -/* void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size); */ +void Z7_FASTCALL HeapSort(UInt32 *p, size_t size); EXTERN_C_END diff --git a/C/Threads.c b/C/Threads.c index a04c005..2ce49c8 100644 --- a/C/Threads.c +++ b/C/Threads.c @@ -1,5 +1,5 @@ /* Threads.c -- multithreading library -2024-03-28 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -59,6 +59,100 @@ WRes Thread_Wait_Close(CThread *p) return (res != 0 ? res : res2); } +typedef struct MY_PROCESSOR_NUMBER { + WORD Group; + BYTE Number; + BYTE Reserved; +} MY_PROCESSOR_NUMBER, *MY_PPROCESSOR_NUMBER; + +typedef struct MY_GROUP_AFFINITY { +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 100000) + // KAFFINITY is not defined in old mingw + ULONG_PTR +#else + KAFFINITY +#endif + Mask; + WORD Group; + WORD Reserved[3]; +} MY_GROUP_AFFINITY, *MY_PGROUP_AFFINITY; + +typedef BOOL (WINAPI *Func_SetThreadGroupAffinity)( + HANDLE hThread, + CONST MY_GROUP_AFFINITY *GroupAffinity, + MY_PGROUP_AFFINITY PreviousGroupAffinity); + +typedef BOOL (WINAPI *Func_GetThreadGroupAffinity)( + HANDLE hThread, + MY_PGROUP_AFFINITY GroupAffinity); + +typedef BOOL (WINAPI *Func_GetProcessGroupAffinity)( + HANDLE hProcess, + PUSHORT GroupCount, + PUSHORT GroupArray); + +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + +#if 0 +#include +#define PRF(x) x +/* +-- + before call of SetThreadGroupAffinity() + GetProcessGroupAffinity return one group. + after call of SetThreadGroupAffinity(): + GetProcessGroupAffinity return more than group, + if SetThreadGroupAffinity() was to another group. +-- + GetProcessAffinityMask MS DOCs: + { + If the calling process contains threads in multiple groups, + the function returns zero for both affinity masks. + } + but tests in win10 with 2 groups (less than 64 cores total): + GetProcessAffinityMask() still returns non-zero affinity masks + even after SetThreadGroupAffinity() calls. +*/ +static void PrintProcess_Info() +{ + { + const + Func_GetProcessGroupAffinity fn_GetProcessGroupAffinity = + (Func_GetProcessGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetProcessGroupAffinity"); + if (fn_GetProcessGroupAffinity) + { + unsigned i; + USHORT GroupCounts[64]; + USHORT GroupCount = Z7_ARRAY_SIZE(GroupCounts); + BOOL boolRes = fn_GetProcessGroupAffinity(GetCurrentProcess(), + &GroupCount, GroupCounts); + printf("\n====== GetProcessGroupAffinity : " + "boolRes=%u GroupCounts = %u :", + boolRes, (unsigned)GroupCount); + for (i = 0; i < GroupCount; i++) + printf(" %u", GroupCounts[i]); + printf("\n"); + } + } + { + DWORD_PTR processAffinityMask, systemAffinityMask; + if (GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask)) + { + PRF(printf("\n====== GetProcessAffinityMask : " + ": processAffinityMask=%x, systemAffinityMask=%x\n", + (UInt32)processAffinityMask, (UInt32)systemAffinityMask);) + } + else + printf("\n==GetProcessAffinityMask FAIL"); + } +} +#else +#ifndef USE_THREADS_CreateThread +// #define PRF(x) +#endif +#endif + WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) { /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ @@ -72,7 +166,43 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) unsigned threadId; *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId)); - + +#if 0 // 1 : for debug + { + DWORD_PTR prevMask; + DWORD_PTR affinity = 1 << 0; + prevMask = SetThreadAffinityMask(*p, (DWORD_PTR)affinity); + prevMask = prevMask; + } +#endif +#if 0 // 1 : for debug + { + /* win10: new thread will be created in same group that is assigned to parent thread + but affinity mask will contain all allowed threads of that group, + even if affinity mask of parent group is not full + win11: what group it will be created, if we have set + affinity of parent thread with ThreadGroupAffinity? + */ + const + Func_GetThreadGroupAffinity fn = + (Func_GetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetThreadGroupAffinity"); + if (fn) + { + // BOOL wres2; + MY_GROUP_AFFINITY groupAffinity; + memset(&groupAffinity, 0, sizeof(groupAffinity)); + /* wres2 = */ fn(*p, &groupAffinity); + PRF(printf("\n==Thread_Create cur = %6u GetThreadGroupAffinity(): " + "wres2_BOOL = %u, group=%u mask=%x\n", + GetCurrentThreadId(), + wres2, + groupAffinity.Group, + (UInt32)groupAffinity.Mask);) + } + } +#endif + #endif /* maybe we must use errno here, but probably GetLastError() is also OK. */ @@ -110,7 +240,84 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param */ } { - DWORD prevSuspendCount = ResumeThread(h); + const DWORD prevSuspendCount = ResumeThread(h); + /* ResumeThread() returns: + 0 : was_not_suspended + 1 : was_resumed + -1 : error + */ + if (prevSuspendCount == (DWORD)-1) + wres = GetError(); + } + } + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return wres; + + #endif +} + + +WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask) +{ +#ifdef USE_THREADS_CreateThread + + UNUSED_VAR(group) + UNUSED_VAR(affinityMask) + return Thread_Create(p, func, param); + +#else + + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + HANDLE h; + WRes wres; + unsigned threadId; + h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId)); + *p = h; + wres = HandleToWRes(h); + if (h) + { + // PrintProcess_Info(); + { + const + Func_SetThreadGroupAffinity fn = + (Func_SetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "SetThreadGroupAffinity"); + if (fn) + { + // WRes wres2; + MY_GROUP_AFFINITY groupAffinity, prev_groupAffinity; + memset(&groupAffinity, 0, sizeof(groupAffinity)); + // groupAffinity.Mask must use only bits that supported by current group + // (groupAffinity.Mask = 0) means all allowed bits + groupAffinity.Mask = affinityMask; + groupAffinity.Group = (WORD)group; + // wres2 = + fn(h, &groupAffinity, &prev_groupAffinity); + /* + if (groupAffinity.Group == prev_groupAffinity.Group) + wres2 = wres2; + else + wres2 = wres2; + if (wres2 == 0) + { + wres2 = GetError(); + PRF(printf("\n==SetThreadGroupAffinity error: %u\n", wres2);) + } + else + { + PRF(printf("\n==Thread_Create_With_Group::SetThreadGroupAffinity()" + " threadId = %6u" + " group=%u mask=%x\n", + threadId, + prev_groupAffinity.Group, + (UInt32)prev_groupAffinity.Mask);) + } + */ + } + } + { + const DWORD prevSuspendCount = ResumeThread(h); /* ResumeThread() returns: 0 : was_not_suspended 1 : was_resumed @@ -297,6 +504,13 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) return Thread_Create_With_CpuSet(p, func, param, NULL); } +/* +WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinity) +{ + UNUSED_VAR(group) + return Thread_Create_With_Affinity(p, func, param, affinity); +} +*/ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) { @@ -577,5 +791,22 @@ WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) return AutoResetEvent_CreateNotSignaled(p); } +void ThreadNextGroup_Init(CThreadNextGroup *p, UInt32 numGroups, UInt32 startGroup) +{ + // printf("\n====== ThreadNextGroup_Init numGroups = %x: startGroup=%x\n", numGroups, startGroup); + if (numGroups == 0) + numGroups = 1; + p->NumGroups = numGroups; + p->NextGroup = startGroup % numGroups; +} + + +UInt32 ThreadNextGroup_GetNext(CThreadNextGroup *p) +{ + const UInt32 next = p->NextGroup; + p->NextGroup = (next + 1) % p->NumGroups; + return next; +} + #undef PRF #undef Print diff --git a/C/Threads.h b/C/Threads.h index 2428e88..e788e32 100644 --- a/C/Threads.h +++ b/C/Threads.h @@ -1,5 +1,5 @@ /* Threads.h -- multithreading library -2024-03-28 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_THREADS_H #define ZIP7_INC_THREADS_H @@ -140,12 +140,22 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param WRes Thread_Wait_Close(CThread *p); #ifdef _WIN32 +WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask); #define Thread_Create_With_CpuSet(p, func, param, cs) \ Thread_Create_With_Affinity(p, func, param, *cs) #else WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet); #endif +typedef struct +{ + unsigned NumGroups; + unsigned NextGroup; +} CThreadNextGroup; + +void ThreadNextGroup_Init(CThreadNextGroup *p, unsigned numGroups, unsigned startGroup); +unsigned ThreadNextGroup_GetNext(CThreadNextGroup *p); + #ifdef _WIN32 diff --git a/C/Util/Lzma/LzmaUtil.dsp b/C/Util/Lzma/LzmaUtil.dsp index 0815eb5..076e11c 100644 --- a/C/Util/Lzma/LzmaUtil.dsp +++ b/C/Util/Lzma/LzmaUtil.dsp @@ -122,6 +122,10 @@ SOURCE=..\..\Compiler.h # End Source File # Begin Source File +SOURCE=..\..\CpuArch.c +# End Source File +# Begin Source File + SOURCE=..\..\CpuArch.h # End Source File # Begin Source File diff --git a/C/Util/LzmaLib/LzmaLib.dsp b/C/Util/LzmaLib/LzmaLib.dsp index 431c024..af1ddbf 100644 --- a/C/Util/LzmaLib/LzmaLib.dsp +++ b/C/Util/LzmaLib/LzmaLib.dsp @@ -43,7 +43,7 @@ RSC=rc.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /YX /FD /c -# ADD CPP /nologo /Gr /MT /W3 /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /FD /c +# ADD CPP /nologo /Gr /MT /W4 /WX /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /FD /c # SUBTRACT CPP /YX # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 @@ -71,7 +71,7 @@ LINK32=link.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /YX /FD /GZ /c -# ADD CPP /nologo /MTd /W3 /Gm /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /D "COMPRESS_MF_MT" /FD /GZ /c +# ADD CPP /nologo /MTd /W4 /WX /Gm /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /D "COMPRESS_MF_MT" /FD /GZ /c # SUBTRACT CPP /YX # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 @@ -128,6 +128,10 @@ SOURCE=..\..\Compiler.h # End Source File # Begin Source File +SOURCE=..\..\CpuArch.c +# End Source File +# Begin Source File + SOURCE=..\..\CpuArch.h # End Source File # Begin Source File diff --git a/C/Xz.h b/C/Xz.h index 34502e9..5fa8d63 100644 --- a/C/Xz.h +++ b/C/Xz.h @@ -1,5 +1,5 @@ /* Xz.h - Xz interface -2024-01-26 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #ifndef ZIP7_INC_XZ_H #define ZIP7_INC_XZ_H @@ -121,6 +121,7 @@ typedef struct UInt64 startOffset; } CXzStream; +#define Xz_CONSTRUCT(p) { (p)->numBlocks = 0; (p)->blocks = NULL; (p)->flags = 0; } void Xz_Construct(CXzStream *p); void Xz_Free(CXzStream *p, ISzAllocPtr alloc); @@ -136,8 +137,13 @@ typedef struct CXzStream *streams; } CXzs; +#define Xzs_CONSTRUCT(p) { (p)->num = 0; (p)->numAllocated = 0; (p)->streams = NULL; } void Xzs_Construct(CXzs *p); void Xzs_Free(CXzs *p, ISzAllocPtr alloc); +/* +Xzs_ReadBackward() must be called for empty CXzs object. +Xzs_ReadBackward() can return non empty object with (p->num != 0) even in case of error. +*/ SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc); UInt64 Xzs_GetNumBlocks(const CXzs *p); @@ -268,8 +274,8 @@ typedef struct size_t outBufSize; size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked - Byte shaDigest[SHA256_DIGEST_SIZE]; - Byte buf[XZ_BLOCK_HEADER_SIZE_MAX]; + UInt32 shaDigest32[SHA256_DIGEST_SIZE / 4]; + Byte buf[XZ_BLOCK_HEADER_SIZE_MAX]; // it must be aligned for 4-bytes } CXzUnpacker; /* alloc : aligned for cache line allocation is better */ diff --git a/C/XzCrc64Opt.c b/C/XzCrc64Opt.c index c855dd5..08a419c 100644 --- a/C/XzCrc64Opt.c +++ b/C/XzCrc64Opt.c @@ -1,5 +1,5 @@ /* XzCrc64Opt.c -- CRC64 calculation (optimized functions) -2023-12-08 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -235,7 +235,7 @@ CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE) v = Q32BE(1, w1) ^ Q32BE(0, w0); v ^= Q32BE(3, d1) ^ Q32BE(2, d0); #endif -#elif +#else #error Stop_Compiling_Bad_CRC64_NUM_TABLES #endif p += Z7_CRC64_NUM_TABLES_USE; diff --git a/C/XzDec.c b/C/XzDec.c index f4160ea..fb5f4d3 100644 --- a/C/XzDec.c +++ b/C/XzDec.c @@ -1,5 +1,5 @@ /* XzDec.c -- Xz Decode -2024-03-01 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -59,7 +59,7 @@ unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value) for (i = 0; i < limit;) { - Byte b = p[i]; + const unsigned b = p[i]; *value |= (UInt64)(b & 0x7F) << (7 * i++); if ((b & 0x80) == 0) return (b == 0 && i != 1) ? 0 : i; @@ -796,11 +796,10 @@ SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf) static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte *buf) { - return indexSize == (((UInt64)GetUi32(buf + 4) + 1) << 2) - && GetUi32(buf) == CrcCalc(buf + 4, 6) - && flags == GetBe16(buf + 8) - && buf[10] == XZ_FOOTER_SIG_0 - && buf[11] == XZ_FOOTER_SIG_1; + return indexSize == (((UInt64)GetUi32a(buf + 4) + 1) << 2) + && GetUi32a(buf) == CrcCalc(buf + 4, 6) + && flags == GetBe16a(buf + 8) + && GetUi16a(buf + 10) == (XZ_FOOTER_SIG_0 | (XZ_FOOTER_SIG_1 << 8)); } #define READ_VARINT_AND_CHECK(buf, pos, size, res) \ @@ -1166,7 +1165,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, p->indexPreSize = 1 + Xz_WriteVarInt(p->buf + 1, p->numBlocks); p->indexPos = p->indexPreSize; p->indexSize += p->indexPreSize; - Sha256_Final(&p->sha, p->shaDigest); + Sha256_Final(&p->sha, (Byte *)(void *)p->shaDigest32); Sha256_Init(&p->sha); p->crc = CrcUpdate(CRC_INIT_VAL, p->buf, p->indexPreSize); p->state = XZ_STATE_STREAM_INDEX; @@ -1241,10 +1240,10 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, break; } { - Byte digest[XZ_CHECK_SIZE_MAX]; + UInt32 digest32[XZ_CHECK_SIZE_MAX / 4]; p->state = XZ_STATE_BLOCK_HEADER; p->pos = 0; - if (XzCheck_Final(&p->check, digest) && memcmp(digest, p->buf, checkSize) != 0) + if (XzCheck_Final(&p->check, (void *)digest32) && memcmp(digest32, p->buf, checkSize) != 0) return SZ_ERROR_CRC; if (p->decodeOnlyOneBlock) { @@ -1289,12 +1288,12 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, } else { - Byte digest[SHA256_DIGEST_SIZE]; + UInt32 digest32[SHA256_DIGEST_SIZE / 4]; p->state = XZ_STATE_STREAM_INDEX_CRC; p->indexSize += 4; p->pos = 0; - Sha256_Final(&p->sha, digest); - if (memcmp(digest, p->shaDigest, SHA256_DIGEST_SIZE) != 0) + Sha256_Final(&p->sha, (void *)digest32); + if (memcmp(digest32, p->shaDigest32, SHA256_DIGEST_SIZE) != 0) return SZ_ERROR_CRC; } } @@ -1313,7 +1312,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, const Byte *ptr = p->buf; p->state = XZ_STATE_STREAM_FOOTER; p->pos = 0; - if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr)) + if (CRC_GET_DIGEST(p->crc) != GetUi32a(ptr)) return SZ_ERROR_CRC; } break; @@ -1343,7 +1342,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, { if (*src != 0) { - if (((UInt32)p->padSize & 3) != 0) + if ((unsigned)p->padSize & 3) return SZ_ERROR_NO_ARCHIVE; p->pos = 0; p->state = XZ_STATE_STREAM_HEADER; diff --git a/C/XzEnc.c b/C/XzEnc.c index 3bfa51c..818588c 100644 --- a/C/XzEnc.c +++ b/C/XzEnc.c @@ -1,5 +1,5 @@ /* XzEnc.c -- Xz Encode -2024-03-01 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -411,6 +411,7 @@ static SRes SeqInFilter_Read(ISeqInStreamPtr pp, void *data, size_t *size) } } +Z7_FORCE_INLINE static void SeqInFilter_Construct(CSeqInFilter *p) { p->buf = NULL; @@ -418,6 +419,7 @@ static void SeqInFilter_Construct(CSeqInFilter *p) p->vt.Read = SeqInFilter_Read; } +Z7_FORCE_INLINE static void SeqInFilter_Free(CSeqInFilter *p, ISzAllocPtr alloc) { if (p->StateCoder.p) @@ -507,6 +509,7 @@ void XzFilterProps_Init(CXzFilterProps *p) void XzProps_Init(CXzProps *p) { p->checkId = XZ_CHECK_CRC32; + p->numThreadGroups = 0; p->blockSize = XZ_PROPS_BLOCK_SIZE_AUTO; p->numBlockThreads_Reduced = -1; p->numBlockThreads_Max = -1; @@ -689,6 +692,7 @@ typedef struct } CLzma2WithFilters; +Z7_FORCE_INLINE static void Lzma2WithFilters_Construct(CLzma2WithFilters *p) { p->lzma2 = NULL; @@ -712,6 +716,7 @@ static SRes Lzma2WithFilters_Create(CLzma2WithFilters *p, ISzAllocPtr alloc, ISz } +Z7_FORCE_INLINE static void Lzma2WithFilters_Free(CLzma2WithFilters *p, ISzAllocPtr alloc) { #ifdef USE_SUBBLOCK @@ -1236,6 +1241,7 @@ SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr in } p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max; + p->mtCoder.numThreadGroups = props->numThreadGroups; p->mtCoder.expectedDataSize = p->expectedDataSize; RINOK(MtCoder_Code(&p->mtCoder)) diff --git a/C/XzEnc.h b/C/XzEnc.h index 31026f7..36898bb 100644 --- a/C/XzEnc.h +++ b/C/XzEnc.h @@ -1,5 +1,5 @@ /* XzEnc.h -- Xz Encode -2023-04-13 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_XZ_ENC_H #define ZIP7_INC_XZ_ENC_H @@ -31,6 +31,7 @@ typedef struct CLzma2EncProps lzma2Props; CXzFilterProps filterProps; unsigned checkId; + unsigned numThreadGroups; // 0 : no groups UInt64 blockSize; int numBlockThreads_Reduced; int numBlockThreads_Max; diff --git a/C/XzIn.c b/C/XzIn.c index 49470a7..d8768dc 100644 --- a/C/XzIn.c +++ b/C/XzIn.c @@ -1,38 +1,39 @@ /* XzIn.c - Xz input -2023-09-07 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" #include #include "7zCrc.h" -#include "CpuArch.h" #include "Xz.h" +#include "CpuArch.h" -/* -#define XZ_FOOTER_SIG_CHECK(p) (memcmp((p), XZ_FOOTER_SIG, XZ_FOOTER_SIG_SIZE) == 0) -*/ -#define XZ_FOOTER_SIG_CHECK(p) ((p)[0] == XZ_FOOTER_SIG_0 && (p)[1] == XZ_FOOTER_SIG_1) - +#define XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(p) \ + (GetUi16a((const Byte *)(const void *)(p) + 10) == \ + (XZ_FOOTER_SIG_0 | (XZ_FOOTER_SIG_1 << 8))) SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream) { - Byte sig[XZ_STREAM_HEADER_SIZE]; + UInt32 data32[XZ_STREAM_HEADER_SIZE / 4]; size_t processedSize = XZ_STREAM_HEADER_SIZE; - RINOK(SeqInStream_ReadMax(inStream, sig, &processedSize)) + RINOK(SeqInStream_ReadMax(inStream, data32, &processedSize)) if (processedSize != XZ_STREAM_HEADER_SIZE - || memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0) + || memcmp(data32, XZ_SIG, XZ_SIG_SIZE) != 0) return SZ_ERROR_NO_ARCHIVE; - return Xz_ParseHeader(p, sig); + return Xz_ParseHeader(p, (const Byte *)(const void *)data32); } -#define READ_VARINT_AND_CHECK(buf, pos, size, res) \ - { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ +#define READ_VARINT_AND_CHECK(buf, size, res) \ +{ const unsigned s = Xz_ReadVarInt(buf, size, res); \ if (s == 0) return SZ_ERROR_ARCHIVE; \ - pos += s; } + size -= s; \ + buf += s; \ +} SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes) { + MY_ALIGN(4) Byte header[XZ_BLOCK_HEADER_SIZE_MAX]; unsigned headerSize; *headerSizeRes = 0; @@ -57,8 +58,12 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, return XzBlock_Parse(p, header); } + #define ADD_SIZE_CHECK(size, val) \ - { const UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; } +{ const UInt64 newSize = size + (val); \ + if (newSize < size) return XZ_SIZE_OVERFLOW; \ + size = newSize; \ +} UInt64 Xz_GetUnpackSize(const CXzStream *p) { @@ -82,76 +87,85 @@ UInt64 Xz_GetPackSize(const CXzStream *p) return size; } -/* -SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStreamPtr inStream) -{ - return SeqInStream_Read(inStream, p->check, XzFlags_GetCheckSize(f)); -} -*/ -static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPtr alloc) +// input; +// CXzStream (p) is empty object. +// size != 0 +// (size & 3) == 0 +// (buf) is aligned for at least 4 bytes. +// output: +// p->numBlocks is number of allocated items in p->blocks +// p->blocks[*] values must be ignored, if function returns error. +static SRes Xz_ParseIndex(CXzStream *p, const Byte *buf, size_t size, ISzAllocPtr alloc) { - size_t numBlocks, pos = 1; - UInt32 crc; - + size_t numBlocks; if (size < 5 || buf[0] != 0) return SZ_ERROR_ARCHIVE; - size -= 4; - crc = CrcCalc(buf, size); - if (crc != GetUi32(buf + size)) - return SZ_ERROR_ARCHIVE; - + { + const UInt32 crc = CrcCalc(buf, size); + if (crc != GetUi32a(buf + size)) + return SZ_ERROR_ARCHIVE; + } + buf++; + size--; { UInt64 numBlocks64; - READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64) + READ_VARINT_AND_CHECK(buf, size, &numBlocks64) + // (numBlocks64) is 63-bit value, so we can calculate (numBlocks64 * 2): + if (numBlocks64 * 2 > size) + return SZ_ERROR_ARCHIVE; + if (numBlocks64 >= ((size_t)1 << (sizeof(size_t) * 8 - 1)) / sizeof(CXzBlockSizes)) + return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE numBlocks = (size_t)numBlocks64; - if (numBlocks != numBlocks64 || numBlocks * 2 > size) - return SZ_ERROR_ARCHIVE; } - - Xz_Free(p, alloc); - if (numBlocks != 0) + // Xz_Free(p, alloc); // it's optional, because (p) is empty already + if (numBlocks) { - size_t i; - p->numBlocks = numBlocks; - p->blocks = (CXzBlockSizes *)ISzAlloc_Alloc(alloc, sizeof(CXzBlockSizes) * numBlocks); - if (!p->blocks) + CXzBlockSizes *blocks = (CXzBlockSizes *)ISzAlloc_Alloc(alloc, sizeof(CXzBlockSizes) * numBlocks); + if (!blocks) return SZ_ERROR_MEM; - for (i = 0; i < numBlocks; i++) + p->blocks = blocks; + p->numBlocks = numBlocks; + // the caller will call Xz_Free() in case of error + do { - CXzBlockSizes *block = &p->blocks[i]; - READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize) - READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize) - if (block->totalSize == 0) + READ_VARINT_AND_CHECK(buf, size, &blocks->totalSize) + READ_VARINT_AND_CHECK(buf, size, &blocks->unpackSize) + if (blocks->totalSize == 0) return SZ_ERROR_ARCHIVE; + blocks++; } + while (--numBlocks); } - while ((pos & 3) != 0) - if (buf[pos++] != 0) + if (size >= 4) + return SZ_ERROR_ARCHIVE; + while (size) + if (buf[--size]) return SZ_ERROR_ARCHIVE; - return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; + return SZ_OK; } + +/* static SRes Xz_ReadIndex(CXzStream *p, ILookInStreamPtr stream, UInt64 indexSize, ISzAllocPtr alloc) { SRes res; size_t size; Byte *buf; - if (indexSize > ((UInt32)1 << 31)) - return SZ_ERROR_UNSUPPORTED; + if (indexSize >= ((size_t)1 << (sizeof(size_t) * 8 - 1))) + return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE size = (size_t)indexSize; - if (size != indexSize) - return SZ_ERROR_UNSUPPORTED; buf = (Byte *)ISzAlloc_Alloc(alloc, size); if (!buf) return SZ_ERROR_MEM; res = LookInStream_Read2(stream, buf, size, SZ_ERROR_UNSUPPORTED); if (res == SZ_OK) - res = Xz_ReadIndex2(p, buf, size, alloc); + res = Xz_ParseIndex(p, buf, size, alloc); ISzAlloc_Free(alloc, buf); return res; } +*/ static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, void *buf, size_t size) { @@ -160,84 +174,102 @@ static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, /* return LookInStream_Read2(stream, buf, size, SZ_ERROR_NO_ARCHIVE); */ } + +/* +in: + (*startOffset) is position in (stream) where xz_stream must be finished. +out: + if returns SZ_OK, then (*startOffset) is position in stream that shows start of xz_stream. +*/ static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startOffset, ISzAllocPtr alloc) { - UInt64 indexSize; - Byte buf[XZ_STREAM_FOOTER_SIZE]; + #define TEMP_BUF_SIZE (1 << 10) + UInt32 buf32[TEMP_BUF_SIZE / 4]; UInt64 pos = (UInt64)*startOffset; - if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE) + if ((pos & 3) || pos < XZ_STREAM_FOOTER_SIZE) return SZ_ERROR_NO_ARCHIVE; - pos -= XZ_STREAM_FOOTER_SIZE; - RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)) + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, XZ_STREAM_FOOTER_SIZE)) - if (!XZ_FOOTER_SIG_CHECK(buf + 10)) + if (!XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(buf32)) { - UInt32 total = 0; pos += XZ_STREAM_FOOTER_SIZE; - for (;;) { - size_t i; - #define TEMP_BUF_SIZE (1 << 10) - Byte temp[TEMP_BUF_SIZE]; - - i = (pos > TEMP_BUF_SIZE) ? TEMP_BUF_SIZE : (size_t)pos; + // pos != 0 + // (pos & 3) == 0 + size_t i = pos >= TEMP_BUF_SIZE ? TEMP_BUF_SIZE : (size_t)pos; pos -= i; - RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i)) - total += (UInt32)i; - for (; i != 0; i--) - if (temp[i - 1] != 0) + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, i)) + i /= 4; + do + if (buf32[i - 1] != 0) break; - if (i != 0) - { - if ((i & 3) != 0) - return SZ_ERROR_NO_ARCHIVE; - pos += i; - break; - } - if (pos < XZ_STREAM_FOOTER_SIZE || total > (1 << 16)) + while (--i); + + pos += i * 4; + #define XZ_STREAM_BACKWARD_READING_PAD_MAX (1 << 16) + // here we don't support rare case with big padding for xz stream. + // so we have padding limit for backward reading. + if ((UInt64)*startOffset - pos > XZ_STREAM_BACKWARD_READING_PAD_MAX) return SZ_ERROR_NO_ARCHIVE; + if (i) + break; } - + // we try to open xz stream after skipping zero padding. + // ((UInt64)*startOffset == pos) is possible here! if (pos < XZ_STREAM_FOOTER_SIZE) return SZ_ERROR_NO_ARCHIVE; pos -= XZ_STREAM_FOOTER_SIZE; - RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)) - if (!XZ_FOOTER_SIG_CHECK(buf + 10)) + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, XZ_STREAM_FOOTER_SIZE)) + if (!XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(buf32)) return SZ_ERROR_NO_ARCHIVE; } - p->flags = (CXzStreamFlags)GetBe16(buf + 8); - + p->flags = (CXzStreamFlags)GetBe16a(buf32 + 2); if (!XzFlags_IsSupported(p->flags)) return SZ_ERROR_UNSUPPORTED; - { /* to eliminate GCC 6.3 warning: dereferencing type-punned pointer will break strict-aliasing rules */ - const Byte *buf_ptr = buf; - if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6)) + const UInt32 *buf_ptr = buf32; + if (GetUi32a(buf_ptr) != CrcCalc(buf32 + 1, 6)) return SZ_ERROR_ARCHIVE; } - - indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2; - - if (pos < indexSize) - return SZ_ERROR_ARCHIVE; - - pos -= indexSize; - RINOK(LookInStream_SeekTo(stream, pos)) - RINOK(Xz_ReadIndex(p, stream, indexSize, alloc)) - { - UInt64 totalSize = Xz_GetPackSize(p); - if (totalSize == XZ_SIZE_OVERFLOW - || totalSize >= ((UInt64)1 << 63) - || pos < totalSize + XZ_STREAM_HEADER_SIZE) + const UInt64 indexSize = ((UInt64)GetUi32a(buf32 + 1) + 1) << 2; + if (pos < indexSize) return SZ_ERROR_ARCHIVE; - pos -= (totalSize + XZ_STREAM_HEADER_SIZE); + pos -= indexSize; + // v25.00: relaxed indexSize check. We allow big index table. + // if (indexSize > ((UInt32)1 << 31)) + if (indexSize >= ((size_t)1 << (sizeof(size_t) * 8 - 1))) + return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE + RINOK(LookInStream_SeekTo(stream, pos)) + // RINOK(Xz_ReadIndex(p, stream, indexSize, alloc)) + { + SRes res; + const size_t size = (size_t)indexSize; + // if (size != indexSize) return SZ_ERROR_UNSUPPORTED; + Byte *buf = (Byte *)ISzAlloc_Alloc(alloc, size); + if (!buf) + return SZ_ERROR_MEM; + res = LookInStream_Read2(stream, buf, size, SZ_ERROR_UNSUPPORTED); + if (res == SZ_OK) + res = Xz_ParseIndex(p, buf, size, alloc); + ISzAlloc_Free(alloc, buf); + RINOK(res) + } + } + { + UInt64 total = Xz_GetPackSize(p); + if (total == XZ_SIZE_OVERFLOW || total >= ((UInt64)1 << 63)) + return SZ_ERROR_ARCHIVE; + total += XZ_STREAM_HEADER_SIZE; + if (pos < total) + return SZ_ERROR_ARCHIVE; + pos -= total; RINOK(LookInStream_SeekTo(stream, pos)) *startOffset = (Int64)pos; } @@ -246,7 +278,6 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startO CSecToRead secToRead; SecToRead_CreateVTable(&secToRead); secToRead.realStream = stream; - RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt)) return (p->flags == headerFlags) ? SZ_OK : SZ_ERROR_ARCHIVE; } @@ -257,8 +288,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startO void Xzs_Construct(CXzs *p) { - p->num = p->numAllocated = 0; - p->streams = 0; + Xzs_CONSTRUCT(p) } void Xzs_Free(CXzs *p, ISzAllocPtr alloc) @@ -268,7 +298,7 @@ void Xzs_Free(CXzs *p, ISzAllocPtr alloc) Xz_Free(&p->streams[i], alloc); ISzAlloc_Free(alloc, p->streams); p->num = p->numAllocated = 0; - p->streams = 0; + p->streams = NULL; } UInt64 Xzs_GetNumBlocks(const CXzs *p) @@ -307,34 +337,49 @@ UInt64 Xzs_GetPackSize(const CXzs *p) SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr stream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc) { Int64 endOffset = 0; + // it's supposed that CXzs object is empty here. + // if CXzs object is not empty, it will add new streams to that non-empty object. + // Xzs_Free(p, alloc); // it's optional call to empty CXzs object. RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END)) *startOffset = endOffset; for (;;) { CXzStream st; SRes res; - Xz_Construct(&st); + Xz_CONSTRUCT(&st) res = Xz_ReadBackward(&st, stream, startOffset, alloc); + // if (res == SZ_OK), then (*startOffset) is start offset of new stream if + // if (res != SZ_OK), then (*startOffset) is unchend or it's expected start offset of stream with error st.startOffset = (UInt64)*startOffset; - RINOK(res) + // we must store (st) object to array, or we must free (st) local object. + if (res != SZ_OK) + { + Xz_Free(&st, alloc); + return res; + } if (p->num == p->numAllocated) { const size_t newNum = p->num + p->num / 4 + 1; void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream)); if (!data) + { + Xz_Free(&st, alloc); return SZ_ERROR_MEM; + } p->numAllocated = newNum; if (p->num != 0) memcpy(data, p->streams, p->num * sizeof(CXzStream)); ISzAlloc_Free(alloc, p->streams); p->streams = (CXzStream *)data; } + // we use direct copying of raw data from local variable (st) to object in array. + // so we don't need to call Xz_Free(&st, alloc) after copying and after p->num++ p->streams[p->num++] = st; if (*startOffset == 0) - break; - RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)) + return SZ_OK; + // seek operation is optional: + // RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)) if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK) return SZ_ERROR_PROGRESS; } - return SZ_OK; } diff --git a/CPP/7zip/7zip_gcc.mak b/CPP/7zip/7zip_gcc.mak index fcb580a..8fbef14 100644 --- a/CPP/7zip/7zip_gcc.mak +++ b/CPP/7zip/7zip_gcc.mak @@ -1245,8 +1245,6 @@ $O/Sha512.o: ../../../../C/Sha512.c $(CC) $(CFLAGS) $< $O/Sha512Opt.o: ../../../../C/Sha512Opt.c $(CC) $(CFLAGS) $< -$O/Sort.o: ../../../../C/Sort.c - $(CC) $(CFLAGS) $< $O/SwapBytes.o: ../../../../C/SwapBytes.c $(CC) $(CFLAGS) $< $O/Xxh64.o: ../../../../C/Xxh64.c @@ -1285,6 +1283,8 @@ $O/Sha1Opt.o: ../../../../Asm/x86/Sha1Opt.asm $(MY_ASM) $(AFLAGS) $< $O/Sha256Opt.o: ../../../../Asm/x86/Sha256Opt.asm $(MY_ASM) $(AFLAGS) $< +$O/Sort.o: ../../../../Asm/x86/Sort.asm + $(MY_ASM) $(AFLAGS) $< ifndef USE_JWASM USE_X86_ASM_AES=1 @@ -1299,6 +1299,8 @@ $O/Sha1Opt.o: ../../../../C/Sha1Opt.c $(CC) $(CFLAGS) $< $O/Sha256Opt.o: ../../../../C/Sha256Opt.c $(CC) $(CFLAGS) $< +$O/Sort.o: ../../../../C/Sort.c + $(CC) $(CFLAGS) $< endif diff --git a/CPP/7zip/Archive/7z/7zCompressionMode.h b/CPP/7zip/Archive/7z/7zCompressionMode.h index 2979810..6874fd7 100644 --- a/CPP/7zip/Archive/7z/7zCompressionMode.h +++ b/CPP/7zip/Archive/7z/7zCompressionMode.h @@ -59,6 +59,7 @@ struct CCompressionMethodMode bool NumThreads_WasForced; bool MultiThreadMixer; UInt32 NumThreads; + UInt32 NumThreadGroups; #endif UString Password; // _Wipe @@ -74,6 +75,7 @@ struct CCompressionMethodMode , NumThreads_WasForced(false) , MultiThreadMixer(true) , NumThreads(1) + , NumThreadGroups(0) #endif , MemoryUsageLimit((UInt64)1 << 30) {} diff --git a/CPP/7zip/Archive/7z/7zHandlerOut.cpp b/CPP/7zip/Archive/7z/7zHandlerOut.cpp index 97476a5..ac264bc 100644 --- a/CPP/7zip/Archive/7z/7zHandlerOut.cpp +++ b/CPP/7zip/Archive/7z/7zHandlerOut.cpp @@ -111,8 +111,8 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode) } } - const UInt64 kSolidBytes_Min = (1 << 24); - const UInt64 kSolidBytes_Max = ((UInt64)1 << 32); + const UInt64 kSolidBytes_Min = 1 << 24; + const UInt64 kSolidBytes_Max = (UInt64)1 << 32; // for non-LZMA2 methods bool needSolid = false; @@ -122,22 +122,24 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode) SetGlobalLevelTo(oneMethodInfo); - #ifndef Z7_ST +#ifndef Z7_ST const bool numThreads_WasSpecifiedInMethod = (oneMethodInfo.Get_NumThreads() >= 0); if (!numThreads_WasSpecifiedInMethod) { // here we set the (NCoderPropID::kNumThreads) property in each method, only if there is no such property already CMultiMethodProps::SetMethodThreadsTo_IfNotFinded(oneMethodInfo, methodMode.NumThreads); } - #endif + if (methodMode.NumThreadGroups > 1) + CMultiMethodProps::Set_Method_NumThreadGroups_IfNotFinded(oneMethodInfo, methodMode.NumThreadGroups); +#endif CMethodFull &methodFull = methodMode.Methods.AddNew(); RINOK(PropsMethod_To_FullMethod(methodFull, oneMethodInfo)) - #ifndef Z7_ST +#ifndef Z7_ST methodFull.Set_NumThreads = true; methodFull.NumThreads = methodMode.NumThreads; - #endif +#endif if (methodFull.Id != k_Copy) needSolid = true; @@ -217,19 +219,18 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode) // here we get real chunkSize cs = oneMethodInfo.Get_Xz_BlockSize(); if (dicSize > cs) - dicSize = cs; + dicSize = cs; - const UInt64 kSolidBytes_Lzma2_Max = ((UInt64)1 << 34); + const UInt64 kSolidBytes_Lzma2_Max = (UInt64)1 << 34; if (numSolidBytes > kSolidBytes_Lzma2_Max) - numSolidBytes = kSolidBytes_Lzma2_Max; + numSolidBytes = kSolidBytes_Lzma2_Max; methodFull.Set_NumThreads = false; // we don't use ICompressSetCoderMt::SetNumberOfThreads() for LZMA2 encoder #ifndef Z7_ST if (!numThreads_WasSpecifiedInMethod && !methodMode.NumThreads_WasForced - && methodMode.MemoryUsageLimit_WasSet - ) + && methodMode.MemoryUsageLimit_WasSet) { const UInt32 lzmaThreads = oneMethodInfo.Get_Lzma_NumThreads(); const UInt32 numBlockThreads_Original = methodMode.NumThreads / lzmaThreads; @@ -273,14 +274,14 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode) { numSolidBytes = (UInt64)dicSize << 7; if (numSolidBytes > kSolidBytes_Max) - numSolidBytes = kSolidBytes_Max; + numSolidBytes = kSolidBytes_Max; } if (_numSolidBytesDefined) continue; if (numSolidBytes < kSolidBytes_Min) - numSolidBytes = kSolidBytes_Min; + numSolidBytes = kSolidBytes_Min; _numSolidBytes = numSolidBytes; _numSolidBytesDefined = true; } @@ -704,6 +705,9 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt methodMode.NumThreads = numThreads; methodMode.NumThreads_WasForced = _numThreads_WasForced; methodMode.MultiThreadMixer = _useMultiThreadMixer; +#ifdef _WIN32 + methodMode.NumThreadGroups = _numThreadGroups; // _change it +#endif // headerMethod.NumThreads = 1; headerMethod.MultiThreadMixer = _useMultiThreadMixer; } diff --git a/CPP/7zip/Archive/Common/HandlerOut.cpp b/CPP/7zip/Archive/Common/HandlerOut.cpp index 22290b1..13d3beb 100644 --- a/CPP/7zip/Archive/Common/HandlerOut.cpp +++ b/CPP/7zip/Archive/Common/HandlerOut.cpp @@ -4,8 +4,6 @@ #include "../../../Common/StringToInt.h" -#include "../Common/ParseProperties.h" - #include "HandlerOut.h" namespace NArchive { @@ -82,6 +80,7 @@ bool ParseSizeString(const wchar_t *s, const PROPVARIANT &prop, UInt64 percentsB return true; } + bool CCommonMethodProps::SetCommonProperty(const UString &name, const PROPVARIANT &value, HRESULT &hres) { hres = S_OK; @@ -151,6 +150,11 @@ void CMultiMethodProps::SetMethodThreadsTo_Replace(CMethodProps &oneMethodInfo, SetMethodProp32_Replace(oneMethodInfo, NCoderPropID::kNumThreads, numThreads); } +void CMultiMethodProps::Set_Method_NumThreadGroups_IfNotFinded(CMethodProps &oneMethodInfo, UInt32 numThreadGroups) +{ + SetMethodProp32(oneMethodInfo, NCoderPropID::kNumThreadGroups, numThreadGroups); +} + #endif // Z7_ST diff --git a/CPP/7zip/Archive/Common/HandlerOut.h b/CPP/7zip/Archive/Common/HandlerOut.h index 31b7026..fc58305 100644 --- a/CPP/7zip/Archive/Common/HandlerOut.h +++ b/CPP/7zip/Archive/Common/HandlerOut.h @@ -17,11 +17,21 @@ protected: void InitCommon() { // _Write_MTime = true; - #ifndef Z7_ST - _numProcessors = _numThreads = NWindows::NSystem::GetNumberOfProcessors(); - _numThreads_WasForced = false; - #endif - + { +#ifndef Z7_ST + _numThreads_WasForced = false; + UInt32 numThreads; +#ifdef _WIN32 + NWindows::NSystem::CProcessAffinity aff; + numThreads = aff.Load_and_GetNumberOfThreads(); + _numThreadGroups = aff.IsGroupMode ? aff.Groups.GroupSizes.Size() : 0; +#else + numThreads = NWindows::NSystem::GetNumberOfProcessors(); +#endif // _WIN32 + _numProcessors = _numThreads = numThreads; +#endif // Z7_ST + } + size_t memAvail = (size_t)sizeof(size_t) << 28; _memAvail = memAvail; _memUsage_Compress = memAvail; @@ -46,11 +56,14 @@ protected: } public: - #ifndef Z7_ST +#ifndef Z7_ST UInt32 _numThreads; UInt32 _numProcessors; +#ifdef _WIN32 + UInt32 _numThreadGroups; +#endif bool _numThreads_WasForced; - #endif +#endif bool _memUsage_WasSet; UInt64 _memUsage_Compress; @@ -80,10 +93,12 @@ public: void SetGlobalLevelTo(COneMethodInfo &oneMethodInfo) const; - #ifndef Z7_ST +#ifndef Z7_ST static void SetMethodThreadsTo_IfNotFinded(CMethodProps &props, UInt32 numThreads); static void SetMethodThreadsTo_Replace(CMethodProps &props, UInt32 numThreads); - #endif + + static void Set_Method_NumThreadGroups_IfNotFinded(CMethodProps &props, UInt32 numThreadGroups); +#endif unsigned GetNumEmptyMethods() const diff --git a/CPP/7zip/Archive/Common/ItemNameUtils.cpp b/CPP/7zip/Archive/Common/ItemNameUtils.cpp index 89f84f7..f448c66 100644 --- a/CPP/7zip/Archive/Common/ItemNameUtils.cpp +++ b/CPP/7zip/Archive/Common/ItemNameUtils.cpp @@ -47,6 +47,25 @@ UString GetOsPath_Remove_TailSlash(const UString &name) } +#if WCHAR_PATH_SEPARATOR != L'/' +void ReplaceToWinSlashes(UString &name, bool useBackslashReplacement) +{ + // name.Replace(kUnixPathSepar, kOsPathSepar); + const unsigned len = name.Len(); + for (unsigned i = 0; i < len; i++) + { + wchar_t c = name[i]; + if (c == L'/') + c = WCHAR_PATH_SEPARATOR; + else if (useBackslashReplacement && c == L'\\') + c = WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT; // WSL scheme + else + continue; + name.ReplaceOneCharAtPos(i, c); + } +} +#endif + void ReplaceToOsSlashes_Remove_TailSlash(UString &name, bool #if WCHAR_PATH_SEPARATOR != L'/' useBackslashReplacement @@ -57,21 +76,7 @@ void ReplaceToOsSlashes_Remove_TailSlash(UString &name, bool return; #if WCHAR_PATH_SEPARATOR != L'/' - { - // name.Replace(kUnixPathSepar, kOsPathSepar); - const unsigned len = name.Len(); - for (unsigned i = 0; i < len; i++) - { - wchar_t c = name[i]; - if (c == L'/') - c = WCHAR_PATH_SEPARATOR; - else if (useBackslashReplacement && c == L'\\') - c = WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT; // WSL scheme - else - continue; - name.ReplaceOneCharAtPos(i, c); - } - } + ReplaceToWinSlashes(name, useBackslashReplacement); #endif if (name.Back() == kOsPathSepar) diff --git a/CPP/7zip/Archive/Common/ItemNameUtils.h b/CPP/7zip/Archive/Common/ItemNameUtils.h index 96aedeb..e344e5d 100644 --- a/CPP/7zip/Archive/Common/ItemNameUtils.h +++ b/CPP/7zip/Archive/Common/ItemNameUtils.h @@ -13,6 +13,9 @@ void ReplaceSlashes_OsToUnix(UString &name); UString GetOsPath(const UString &name); UString GetOsPath_Remove_TailSlash(const UString &name); +#if WCHAR_PATH_SEPARATOR != L'/' +void ReplaceToWinSlashes(UString &name, bool useBackslashReplacement); +#endif void ReplaceToOsSlashes_Remove_TailSlash(UString &name, bool useBackslashReplacement = false); void NormalizeSlashes_in_FileName_for_OsPath(wchar_t *s, unsigned len); void NormalizeSlashes_in_FileName_for_OsPath(UString &name); diff --git a/CPP/7zip/Archive/XzHandler.cpp b/CPP/7zip/Archive/XzHandler.cpp index a3839f5..22fca3f 100644 --- a/CPP/7zip/Archive/XzHandler.cpp +++ b/CPP/7zip/Archive/XzHandler.cpp @@ -446,7 +446,7 @@ void COpenCallbackWrap::Init(IArchiveOpenCallback *callback) struct CXzsCPP { CXzs p; - CXzsCPP() { Xzs_Construct(&p); } + CXzsCPP() { Xzs_CONSTRUCT(&p) } ~CXzsCPP() { Xzs_Free(&p, &g_Alloc); } }; @@ -536,6 +536,9 @@ HRESULT CHandler::Open2(IInStream *inStream, /* UInt32 flags, */ IArchiveOpenCal if (res2 == SZ_ERROR_ARCHIVE) return S_FALSE; + // what codes are possible here ? + // ?? res2 == SZ_ERROR_MEM : is possible here + // ?? res2 == SZ_ERROR_UNSUPPORTED : is possible here } else if (!isIndex) { @@ -1159,6 +1162,13 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt */ #ifndef Z7_ST + +#ifdef _WIN32 + // we don't use chunk multithreading inside lzma2 stream. + // so we don't set xzProps.lzma2Props.numThreadGroups. + if (_numThreadGroups > 1) + xzProps.numThreadGroups = _numThreadGroups; +#endif UInt32 numThreads = _numThreads; @@ -1183,6 +1193,8 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt CMultiMethodProps::SetMethodThreadsTo_IfNotFinded(oneMethodInfo, numThreads); } + // printf("\n====== GetProcessGroupAffinity : \n"); + UInt64 cs = _numSolidBytes; if (cs != XZ_PROPS_BLOCK_SIZE_AUTO) oneMethodInfo.AddProp_BlockSize2(cs); diff --git a/CPP/7zip/Bundles/Alone7z/makefile b/CPP/7zip/Bundles/Alone7z/makefile index 296258e..15792da 100644 --- a/CPP/7zip/Bundles/Alone7z/makefile +++ b/CPP/7zip/Bundles/Alone7z/makefile @@ -148,7 +148,6 @@ C_OBJS = \ $O\LzmaEnc.obj \ $O\MtCoder.obj \ $O\MtDec.obj \ - $O\Sort.obj \ $O\SwapBytes.obj \ $O\Threads.obj \ $O\Xz.obj \ @@ -164,5 +163,6 @@ C_OBJS = \ !include "../../LzFindOpt.mak" !include "../../LzmaDec.mak" !include "../../Sha256.mak" +!include "../../Sort.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp b/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp index 452fb24..b5ef104 100644 --- a/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp +++ b/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp @@ -229,7 +229,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE /* hPrevInstance */, } const FString tempDirPath = tempDir.GetPath(); - // tempDirPath = L"M:\\1\\"; // to test low disk space + // tempDirPath = "M:\\1\\"; // to test low disk space { bool isCorrupt = false; UString errorMessage; @@ -308,7 +308,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE /* hPrevInstance */, { if (appLaunched.IsEmpty()) { - appLaunched = L"setup.exe"; + appLaunched = "setup.exe"; if (!NFind::DoesFileExist_FollowLink(us2fs(appLaunched))) { if (!assumeYes) diff --git a/CPP/7zip/Common/InBuffer.h b/CPP/7zip/Common/InBuffer.h index 21afd9f..b851b9e 100644 --- a/CPP/7zip/Common/InBuffer.h +++ b/CPP/7zip/Common/InBuffer.h @@ -97,6 +97,16 @@ public: size_t ReadBytesPart(Byte *buf, size_t size); size_t ReadBytes(Byte *buf, size_t size); + const Byte *Lookahead(size_t &rem) + { + rem = (size_t)(_bufLim - _buf); + if (!rem) + { + ReadBlock(); + rem = (size_t)(_bufLim - _buf); + } + return _buf; + } size_t Skip(size_t size); }; diff --git a/CPP/7zip/Common/MethodProps.cpp b/CPP/7zip/Common/MethodProps.cpp index 70aab9f..7c153b3 100644 --- a/CPP/7zip/Common/MethodProps.cpp +++ b/CPP/7zip/Common/MethodProps.cpp @@ -324,15 +324,22 @@ void CCoderProps::AddProp(const CProp &prop) HRESULT CProps::SetCoderProps(ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce) const { - return SetCoderProps_DSReduce_Aff(scp, dataSizeReduce, NULL); + return SetCoderProps_DSReduce_Aff(scp, dataSizeReduce, NULL, NULL, NULL); } HRESULT CProps::SetCoderProps_DSReduce_Aff( ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce, - const UInt64 *affinity) const + const UInt64 *affinity, + const UInt32 *affinityGroup, + const UInt64 *affinityInGroup) const { - CCoderProps coderProps(Props.Size() + (dataSizeReduce ? 1 : 0) + (affinity ? 1 : 0) ); + CCoderProps coderProps(Props.Size() + + (dataSizeReduce ? 1 : 0) + + (affinity ? 1 : 0) + + (affinityGroup ? 1 : 0) + + (affinityInGroup ? 1 : 0) + ); FOR_VECTOR (i, Props) coderProps.AddProp(Props[i]); if (dataSizeReduce) @@ -349,6 +356,20 @@ HRESULT CProps::SetCoderProps_DSReduce_Aff( prop.Value = *affinity; coderProps.AddProp(prop); } + if (affinityGroup) + { + CProp prop; + prop.Id = NCoderPropID::kThreadGroup; + prop.Value = *affinityGroup; + coderProps.AddProp(prop); + } + if (affinityInGroup) + { + CProp prop; + prop.Id = NCoderPropID::kAffinityInGroup; + prop.Value = *affinityInGroup; + coderProps.AddProp(prop); + } return coderProps.SetProps(scp); } @@ -409,6 +430,11 @@ static const CNameToPropID g_NameToPropID[] = { VT_UI4, "offset" }, { VT_UI4, "zhb" } /* + , { VT_UI4, "tgn" }, // kNumThreadGroups + , { VT_UI4, "tgi" }, // kThreadGroup + , { VT_UI8, "tga" }, // kAffinityInGroup + */ + /* , // { VT_UI4, "zhc" }, // { VT_UI4, "zhd" }, diff --git a/CPP/7zip/Common/MethodProps.h b/CPP/7zip/Common/MethodProps.h index c06291b..31ed9eb 100644 --- a/CPP/7zip/Common/MethodProps.h +++ b/CPP/7zip/Common/MethodProps.h @@ -80,7 +80,11 @@ struct CProps } HRESULT SetCoderProps(ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce = NULL) const; - HRESULT SetCoderProps_DSReduce_Aff(ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce, const UInt64 *affinity) const; + HRESULT SetCoderProps_DSReduce_Aff(ICompressSetCoderProperties *scp, + const UInt64 *dataSizeReduce, + const UInt64 *affinity, + const UInt32 *affinityGroup, + const UInt64 *affinityInGroup) const; }; class CMethodProps: public CProps diff --git a/CPP/7zip/Common/OutBuffer.h b/CPP/7zip/Common/OutBuffer.h index 6564eca..23dd76a 100644 --- a/CPP/7zip/Common/OutBuffer.h +++ b/CPP/7zip/Common/OutBuffer.h @@ -45,6 +45,7 @@ public: HRESULT Flush() throw(); void FlushWithCheck(); + Z7_FORCE_INLINE void WriteByte(Byte b) { UInt32 pos = _pos; @@ -54,10 +55,34 @@ public: if (pos == _limitPos) FlushWithCheck(); } + void WriteBytes(const void *data, size_t size) { - for (size_t i = 0; i < size; i++) - WriteByte(((const Byte *)data)[i]); + while (size) + { + UInt32 pos = _pos; + size_t cur = (size_t)(_limitPos - pos); + if (cur >= size) + cur = size; + size -= cur; + Byte *dest = _buf + pos; + pos += (UInt32)cur; + _pos = pos; +#if 0 + memcpy(dest, data, cur); + data = (const void *)((const Byte *)data + cur); +#else + const Byte * const lim = (const Byte *)data + cur; + do + { + *dest++ = *(const Byte *)data; + data = (const void *)((const Byte *)data + 1); + } + while (data != lim); +#endif + if (pos == _limitPos) + FlushWithCheck(); + } } Byte *GetOutBuffer(size_t &avail) diff --git a/CPP/7zip/Compress/Lzma2Encoder.cpp b/CPP/7zip/Compress/Lzma2Encoder.cpp index 20af349..1498663 100644 --- a/CPP/7zip/Compress/Lzma2Encoder.cpp +++ b/CPP/7zip/Compress/Lzma2Encoder.cpp @@ -52,7 +52,15 @@ HRESULT SetLzma2Prop(PROPID propID, const PROPVARIANT &prop, CLzma2EncProps &lzm case NCoderPropID::kNumThreads: if (prop.vt != VT_UI4) return E_INVALIDARG; - lzma2Props.numTotalThreads = (int)(prop.ulVal); + lzma2Props.numTotalThreads = (int)prop.ulVal; + break; + case NCoderPropID::kNumThreadGroups: + if (prop.vt != VT_UI4) + return E_INVALIDARG; + // 16-bit value supported by Windows + if (prop.ulVal >= (1u << 16)) + return E_INVALIDARG; + lzma2Props.numThreadGroups = (unsigned)prop.ulVal; break; default: RINOK(NLzma::SetLzmaProp(propID, prop, lzma2Props.lzmaProps)) diff --git a/CPP/7zip/Compress/LzmaEncoder.cpp b/CPP/7zip/Compress/LzmaEncoder.cpp index 7ca03f1..2ce6f1e 100644 --- a/CPP/7zip/Compress/LzmaEncoder.cpp +++ b/CPP/7zip/Compress/LzmaEncoder.cpp @@ -101,6 +101,24 @@ HRESULT SetLzmaProp(PROPID propID, const PROPVARIANT &prop, CLzmaEncProps &ep) return S_OK; } + if (propID == NCoderPropID::kAffinityInGroup) + { + if (prop.vt == VT_UI8) + ep.affinityInGroup = prop.uhVal.QuadPart; + else + return E_INVALIDARG; + return S_OK; + } + + if (propID == NCoderPropID::kThreadGroup) + { + if (prop.vt == VT_UI4) + ep.affinityGroup = (Int32)(UInt32)prop.ulVal; + else + return E_INVALIDARG; + return S_OK; + } + if (propID == NCoderPropID::kHashBits) { if (prop.vt == VT_UI4) diff --git a/CPP/7zip/Crypto/MyAes.cpp b/CPP/7zip/Crypto/MyAes.cpp index 1244d44..511f2e3 100644 --- a/CPP/7zip/Crypto/MyAes.cpp +++ b/CPP/7zip/Crypto/MyAes.cpp @@ -153,7 +153,26 @@ Z7_COM7F_IMF2(UInt32, CAesCtrCoder::Filter(Byte *data, UInt32 size)) #ifndef Z7_EXTRACT_ONLY #ifdef MY_CPU_X86_OR_AMD64 - #define USE_HW_AES + + #if defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1110) + #define USE_HW_AES + #if (__INTEL_COMPILER >= 1900) + #define USE_HW_VAES + #endif + #endif + #elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400) + #define USE_HW_AES + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_HW_VAES + #endif + #elif defined(_MSC_VER) + #define USE_HW_AES + #define USE_HW_VAES + #endif + #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) #if defined(__ARM_FEATURE_AES) \ @@ -186,15 +205,15 @@ Z7_COM7F_IMF2(UInt32, CAesCtrCoder::Filter(Byte *data, UInt32 size)) #define SET_AES_FUNC_2(f2) \ if (algo == 2) if (g_Aes_SupportedFunctions_Flags & k_Aes_SupportedFunctions_HW) \ { f = f2; } - #ifdef MY_CPU_X86_OR_AMD64 + #ifdef USE_HW_VAES #define SET_AES_FUNC_23(f2, f3) \ SET_AES_FUNC_2(f2) \ if (algo == 3) if (g_Aes_SupportedFunctions_Flags & k_Aes_SupportedFunctions_HW_256) \ { f = f3; } - #else // MY_CPU_X86_OR_AMD64 + #else // USE_HW_VAES #define SET_AES_FUNC_23(f2, f3) \ SET_AES_FUNC_2(f2) - #endif // MY_CPU_X86_OR_AMD64 + #endif // USE_HW_VAES #else // USE_HW_AES #define SET_AES_FUNC_23(f2, f3) #endif // USE_HW_AES diff --git a/CPP/7zip/ICoder.h b/CPP/7zip/ICoder.h index def2fc3..9810d23 100644 --- a/CPP/7zip/ICoder.h +++ b/CPP/7zip/ICoder.h @@ -136,6 +136,9 @@ namespace NCoderPropID kAffinity, // VT_UI8 kBranchOffset, // VT_UI4 kHashBits, // VT_UI4 + kNumThreadGroups, // VT_UI4 + kThreadGroup, // VT_UI4 + kAffinityInGroup, // VT_UI8 /* // kHash3Bits, // VT_UI4 // kHash2Bits, // VT_UI4 diff --git a/CPP/7zip/Sort.mak b/CPP/7zip/Sort.mak new file mode 100644 index 0000000..14f1feb --- /dev/null +++ b/CPP/7zip/Sort.mak @@ -0,0 +1,6 @@ +!IF defined(USE_NO_ASM) || defined(USE_C_SORT) || "$(PLATFORM)" == "ia64" || "$(PLATFORM)" == "mips" || "$(PLATFORM)" == "arm" || "$(PLATFORM)" == "arm64" +C_OBJS = $(C_OBJS) \ +!ELSE +ASM_OBJS = $(ASM_OBJS) \ +!ENDIF + $O\Sort.obj diff --git a/CPP/7zip/UI/Client7z/makefile.gcc b/CPP/7zip/UI/Client7z/makefile.gcc index d7fd1bf..c07cb5e 100644 --- a/CPP/7zip/UI/Client7z/makefile.gcc +++ b/CPP/7zip/UI/Client7z/makefile.gcc @@ -24,7 +24,6 @@ else SYS_OBJS = \ $O/MyWindows.o \ - $O/TimeUtils.o \ endif @@ -53,6 +52,7 @@ WIN_OBJS = \ $O/FileName.o \ $O/PropVariant.o \ $O/PropVariantConv.o \ + $O/TimeUtils.o \ 7ZIP_COMMON_OBJS = \ $O/FileStreams.o \ diff --git a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp index 45baca5..a99bb0d 100644 --- a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp +++ b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp @@ -63,17 +63,46 @@ EXTERN_C_END #else -// #define MY_isatty_fileno(x) (isatty(fileno(x))) -// #define MY_IS_TERMINAL(x) (MY_isatty_fileno(x) != 0); -static inline bool MY_IS_TERMINAL(FILE *x) +static bool MY_IS_TERMINAL(FILE *x) { - return ( - #if defined(_MSC_VER) && (_MSC_VER >= 1400) - _isatty(_fileno(x)) - #else - isatty(fileno(x)) - #endif - != 0); +#ifdef _WIN32 + /* +crt/stdio.h: +typedef struct _iobuf FILE; +#define stdin (&_iob[0]) +#define stdout (&_iob[1]) +#define stderr (&_iob[2]) +*/ + // fprintf(stderr, "\nMY_IS_TERMINAL = %p", x); + const int fd = _fileno(x); + /* (fd) is 0, 1 or 2 in console program. + docs: If stdout or stderr is not associated with + an output stream (for example, in a Windows application + without a console window), the file descriptor returned is -2. + In previous versions, the file descriptor returned was -1. + */ + if (fd < 0) // is not associated with an output stream application (without a console window) + return false; + // fprintf(stderr, "\n\nstderr _fileno(%p) = %d", x, fd); + if (!_isatty(fd)) + return false; + // fprintf(stderr, "\nisatty_val = true"); + const HANDLE h = (HANDLE)_get_osfhandle(fd); + /* _get_osfhandle() returns intptr_t in new SDK, or long in MSVC6. + Also it can return (INVALID_HANDLE_VALUE). + docs: _get_osfhandle also returns the special value -2 when + the file descriptor is not associated with a stream + in old msvcrt.dll: it returns (-1) for incorrect value + */ + // fprintf(stderr, "\n_get_osfhandle() = %p", (void *)h); + if (h == NULL || h == INVALID_HANDLE_VALUE) + return false; + DWORD st; + // fprintf(stderr, "\nGetConsoleMode() = %u", (unsigned)GetConsoleMode(h, &st)); + return GetConsoleMode(h, &st) != 0; +#else + return isatty(fileno(x)) != 0; +#endif } #endif @@ -312,7 +341,7 @@ static const CSwitchForm kSwitchForms[] = { "spf", SWFRM_STRING_SINGL(0) }, { "snh", SWFRM_MINUS }, - { "snld", SWFRM_MINUS }, + { "snld", SWFRM_STRING }, { "snl", SWFRM_MINUS }, { "sni", SWFRM_SIMPLE }, @@ -1088,7 +1117,7 @@ void CArcCmdLineParser::Parse1(const UStringVector &commandStrings, const UString &s = parser[NKey::kLargePages].PostStrings[0]; if (s.IsEmpty()) slp = 1; - else if (s != L"-") + else if (!s.IsEqualTo("-")) { if (!StringToUInt32(s, slp)) throw CArcCmdLineException("Unsupported switch postfix for -slp", s); @@ -1338,7 +1367,7 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) const UString &s = parser[NKey::kFullPathMode].PostStrings[0]; if (!s.IsEmpty()) { - if (s == L"2") + if (s.IsEqualTo("2")) censorPathMode = NWildcard::k_FullPath; else throw CArcCmdLineException("Unsupported -spf:", s); @@ -1400,6 +1429,7 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) const bool isExtractGroupCommand = options.Command.IsFromExtractGroup(); const bool isExtractOrList = isExtractGroupCommand || options.Command.CommandType == NCommandType::kList; const bool isRename = options.Command.CommandType == NCommandType::kRename; + options.UpdateOptions.RenameMode = isRename; if ((isExtractOrList || isRename) && options.StdInMode) thereIsArchiveName = false; @@ -1449,14 +1479,8 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) SetBoolPair(parser, NKey::kStoreOwnerId, options.StoreOwnerId); SetBoolPair(parser, NKey::kStoreOwnerName, options.StoreOwnerName); - - CBoolPair symLinks_AllowDangerous; - SetBoolPair(parser, NKey::kSymLinks_AllowDangerous, symLinks_AllowDangerous); - - /* bool supportSymLink = options.SymLinks.Val; - if (!options.SymLinks.Def) { if (isExtractOrList) @@ -1464,7 +1488,6 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) else supportSymLink = false; } - #ifdef ENV_HAVE_LSTAT if (supportSymLink) global_use_lstat = 1; @@ -1473,7 +1496,6 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) #endif */ - if (isExtractOrList) { CExtractOptionsBase &eo = options.ExtractOptions; @@ -1497,7 +1519,15 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) if (!options.SymLinks.Def) nt.SymLinks.Val = true; - nt.SymLinks_AllowDangerous = symLinks_AllowDangerous; + if (parser[NKey::kSymLinks_AllowDangerous].ThereIs) + { + const UString &s = parser[NKey::kSymLinks_AllowDangerous].PostStrings[0]; + UInt32 v = 9; // default value for "-snld" instead of default = 5 without "-snld". + if (!s.IsEmpty()) + if (!StringToUInt32(s, v)) + throw CArcCmdLineException("Unsupported switch postfix -snld", s); + nt.SymLinks_DangerousLevel = (unsigned)v; + } nt.ReplaceColonForAltStream = parser[NKey::kReplaceColonForAltStream].ThereIs; nt.WriteToAltStreamIfColon = parser[NKey::kWriteToAltStreamIfColon].ThereIs; @@ -1516,9 +1546,9 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) const UString &s = parser[NKey::kZoneFile].PostStrings[0]; if (!s.IsEmpty()) { - if (s == L"0") eo.ZoneMode = NExtract::NZoneIdMode::kNone; - else if (s == L"1") eo.ZoneMode = NExtract::NZoneIdMode::kAll; - else if (s == L"2") eo.ZoneMode = NExtract::NZoneIdMode::kOffice; + if (s.IsEqualTo("0")) eo.ZoneMode = NExtract::NZoneIdMode::kNone; + else if (s.IsEqualTo("1")) eo.ZoneMode = NExtract::NZoneIdMode::kAll; + else if (s.IsEqualTo("2")) eo.ZoneMode = NExtract::NZoneIdMode::kOffice; else throw CArcCmdLineException("Unsupported -snz:", s); } diff --git a/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp b/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp index d8b473c..9de6d90 100644 --- a/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp +++ b/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp @@ -6,12 +6,10 @@ #undef printf // #include -// #include "../../../../C/CpuTicks.h" #include "../../../../C/Alloc.h" #include "../../../../C/CpuArch.h" - #include "../../../Common/ComTry.h" #include "../../../Common/IntToString.h" #include "../../../Common/StringConvert.h" @@ -33,6 +31,8 @@ #include "../../Common/FilePathAutoRename.h" #include "../../Common/StreamUtils.h" +#include "../../Archive/Common/ItemNameUtils.h" + #include "../Common/ExtractingFilePath.h" #include "../Common/PropIDUtils.h" @@ -54,6 +54,23 @@ static const char * const kCantSetFileLen = "Cannot set length for output file"; #ifdef SUPPORT_LINKS static const char * const kCantCreateHardLink = "Cannot create hard link"; static const char * const kCantCreateSymLink = "Cannot create symbolic link"; +static const char * const k_HardLink_to_SymLink_Ignored = "Hard link to symbolic link was ignored"; +static const char * const k_CantDelete_File_for_SymLink = "Cannot delete file for symbolic link creation"; +static const char * const k_CantDelete_Dir_for_SymLink = "Cannot delete directory for symbolic link creation"; +#endif + +static const unsigned k_LinkDataSize_LIMIT = 1 << 12; + +#ifdef SUPPORT_LINKS +#if WCHAR_PATH_SEPARATOR != L'/' + // we convert linux slashes to windows slashes for further processing. + // also we convert linux backslashes to BackslashReplacement character. + #define REPLACE_SLASHES_from_Linux_to_Sys(s) \ + { NArchive::NItemName::ReplaceToWinSlashes(s, true); } // useBackslashReplacement + // { s.Replace(L'/', WCHAR_PATH_SEPARATOR); } +#else + #define REPLACE_SLASHES_from_Linux_to_Sys(s) +#endif #endif #ifndef Z7_SFX @@ -217,7 +234,7 @@ HRESULT CArchiveExtractCallback::PrepareHardLinks(const CRecordVector *r if (!_arc->Ask_INode) return S_OK; - IInArchive *archive = _arc->Archive; + IInArchive * const archive = _arc->Archive; CRecordVector &hardIDs = _hardLinks.IDs; { @@ -313,13 +330,14 @@ void CArchiveExtractCallback::Init( _outFileStream.Release(); _bufPtrSeqOutStream.Release(); - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS _hardLinks.Clear(); - #endif + _postLinks.Clear(); +#endif - #ifdef SUPPORT_ALT_STREAMS +#ifdef SUPPORT_ALT_STREAMS _renamedFiles.Clear(); - #endif +#endif _ntOptions = ntOptions; _wildcardCensor = wildcardCensor; @@ -442,7 +460,8 @@ Z7_COM7F_IMF(CArchiveExtractCallback::SetRatioInfo(const UInt64 *inSize, const U } -void CArchiveExtractCallback::CreateComplexDirectory(const UStringVector &dirPathParts, FString &fullPath) +void CArchiveExtractCallback::CreateComplexDirectory( + const UStringVector &dirPathParts, bool isFinal, FString &fullPath) { // we use (_item.IsDir) in this function @@ -474,7 +493,7 @@ void CArchiveExtractCallback::CreateComplexDirectory(const UStringVector &dirPat const UString &s = dirPathParts[i]; fullPath += us2fs(s); - const bool isFinalDir = (i == dirPathParts.Size() - 1 && _item.IsDir); + const bool isFinalDir = (i == dirPathParts.Size() - 1 && isFinal && _item.IsDir); if (fullPath.IsEmpty()) { @@ -535,7 +554,7 @@ static void AddPathToMessage(UString &s, const FString &path) s += fs2us(path); } -HRESULT CArchiveExtractCallback::SendMessageError(const char *message, const FString &path) +HRESULT CArchiveExtractCallback::SendMessageError(const char *message, const FString &path) const { UString s (message); AddPathToMessage(s, path); @@ -543,7 +562,7 @@ HRESULT CArchiveExtractCallback::SendMessageError(const char *message, const FSt } -HRESULT CArchiveExtractCallback::SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path) +HRESULT CArchiveExtractCallback::SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path) const { UString s (message); if (errorCode != S_OK) @@ -555,13 +574,13 @@ HRESULT CArchiveExtractCallback::SendMessageError_with_Error(HRESULT errorCode, return _extractCallback2->MessageError(s); } -HRESULT CArchiveExtractCallback::SendMessageError_with_LastError(const char *message, const FString &path) +HRESULT CArchiveExtractCallback::SendMessageError_with_LastError(const char *message, const FString &path) const { const HRESULT errorCode = GetLastError_noZero_HRESULT(); return SendMessageError_with_Error(errorCode, message, path); } -HRESULT CArchiveExtractCallback::SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2) +HRESULT CArchiveExtractCallback::SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2) const { UString s (message); if (errorCode != 0) @@ -574,6 +593,13 @@ HRESULT CArchiveExtractCallback::SendMessageError2(HRESULT errorCode, const char return _extractCallback2->MessageError(s); } +HRESULT CArchiveExtractCallback::SendMessageError2_with_LastError( + const char *message, const FString &path1, const FString &path2) const +{ + const HRESULT errorCode = GetLastError_noZero_HRESULT(); + return SendMessageError2(errorCode, message, path1, path2); +} + #ifndef Z7_SFX Z7_CLASS_IMP_COM_1( @@ -604,38 +630,25 @@ Z7_COM7F_IMF(CGetProp::GetProp(PROPID propID, PROPVARIANT *value)) #endif // Z7_SFX -#ifdef SUPPORT_LINKS - -static UString GetDirPrefixOf(const UString &src) -{ - UString s (src); - if (!s.IsEmpty()) - { - if (IsPathSepar(s.Back())) - s.DeleteBack(); - int pos = s.ReverseFind_PathSepar(); - s.DeleteFrom((unsigned)(pos + 1)); - } - return s; -} - -#endif // SUPPORT_LINKS - struct CLinkLevelsInfo { bool IsAbsolute; + bool ParentDirDots_after_NonParent; int LowLevel; int FinalLevel; - void Parse(const UString &path); + void Parse(const UString &path, bool isWSL); }; -void CLinkLevelsInfo::Parse(const UString &path) +void CLinkLevelsInfo::Parse(const UString &path, bool isWSL) { - IsAbsolute = NName::IsAbsolutePath(path); - + IsAbsolute = isWSL ? + IS_PATH_SEPAR(path[0]) : + NName::IsAbsolutePath(path); LowLevel = 0; FinalLevel = 0; + ParentDirDots_after_NonParent = false; + bool nonParentDir = false; UStringVector parts; SplitPathToParts(path, parts); @@ -650,32 +663,41 @@ void CLinkLevelsInfo::Parse(const UString &path) IsAbsolute = true; continue; } - if (s == L".") + if (s.IsEqualTo(".")) continue; - if (s == L"..") + if (s.IsEqualTo("..")) { + if (IsAbsolute || nonParentDir) + ParentDirDots_after_NonParent = true; level--; if (LowLevel > level) - LowLevel = level; + LowLevel = level; } else + { + nonParentDir = true; level++; + } } FinalLevel = level; } -bool IsSafePath(const UString &path); -bool IsSafePath(const UString &path) +static bool IsSafePath(const UString &path, bool isWSL) { CLinkLevelsInfo levelsInfo; - levelsInfo.Parse(path); + levelsInfo.Parse(path, isWSL); return !levelsInfo.IsAbsolute && levelsInfo.LowLevel >= 0 && levelsInfo.FinalLevel > 0; } +bool IsSafePath(const UString &path); +bool IsSafePath(const UString &path) +{ + return IsSafePath(path, false); // isWSL +} bool CensorNode_CheckPath2(const NWildcard::CCensorNode &node, const CReadArcItem &item, bool &include); bool CensorNode_CheckPath2(const NWildcard::CCensorNode &node, const CReadArcItem &item, bool &include) @@ -791,159 +813,113 @@ HRESULT CArchiveExtractCallback::MyCopyFile(ISequentialOutStream *outStream) HRESULT CArchiveExtractCallback::ReadLink() { - IInArchive *archive = _arc->Archive; + IInArchive * const archive = _arc->Archive; const UInt32 index = _index; - _link.Clear(); - + // _link.Clear(); // _link.Clear() was called already. { NCOM::CPropVariant prop; RINOK(archive->GetProperty(index, kpidHardLink, &prop)) if (prop.vt == VT_BSTR) { - _link.isHardLink = true; - // _link.isCopyLink = false; + _link.LinkType = k_LinkType_HardLink; _link.isRelative = false; // RAR5, TAR: hard links are from root folder of archive - _link.linkPath.SetFromBstr(prop.bstrVal); + _link.LinkPath.SetFromBstr(prop.bstrVal); + // 7-Zip 24-: tar handler returned original path (with linux slash in most case) + // 7-Zip 24-: rar5 handler returned path with system slash. + // 7-Zip 25+: tar/rar5 handlers return linux path in most cases. } else if (prop.vt != VT_EMPTY) return E_FAIL; } - /* { NCOM::CPropVariant prop; RINOK(archive->GetProperty(index, kpidCopyLink, &prop)); if (prop.vt == VT_BSTR) { - _link.isHardLink = false; - _link.isCopyLink = true; + _link.LinkType = k_LinkType_CopyLink; _link.isRelative = false; // RAR5: copy links are from root folder of archive - _link.linkPath.SetFromBstr(prop.bstrVal); + _link.LinkPath.SetFromBstr(prop.bstrVal); } else if (prop.vt != VT_EMPTY) return E_FAIL; } */ - { NCOM::CPropVariant prop; RINOK(archive->GetProperty(index, kpidSymLink, &prop)) if (prop.vt == VT_BSTR) { - _link.isHardLink = false; - // _link.isCopyLink = false; - _link.isRelative = true; // RAR5, TAR: symbolic links can be relative - _link.linkPath.SetFromBstr(prop.bstrVal); + _link.LinkType = k_LinkType_PureSymLink; + _link.isRelative = true; // RAR5, TAR: symbolic links are relative by default + _link.LinkPath.SetFromBstr(prop.bstrVal); + // 7-Zip 24-: (tar, cpio, xar, ext, iso) handlers returned returned original path (with linux slash in most case) + // 7-Zip 24-: rar5 handler returned path with system slash. + // 7-Zip 25+: all handlers return linux path in most cases. } else if (prop.vt != VT_EMPTY) return E_FAIL; } - NtReparse_Data = NULL; - NtReparse_Size = 0; - - if (_link.linkPath.IsEmpty() && _arc->GetRawProps) + // linux path separator in (_link.LinkPath) is expected for most cases, + // if new handler code is used, and if data in archive is correct. + // NtReparse_Data = NULL; + // NtReparse_Size = 0; + if (!_link.LinkPath.IsEmpty()) + { + REPLACE_SLASHES_from_Linux_to_Sys(_link.LinkPath) + } + else if (_arc->GetRawProps) { const void *data; - UInt32 dataSize; - UInt32 propType; - - _arc->GetRawProps->GetRawProp(_index, kpidNtReparse, &data, &dataSize, &propType); - - // if (dataSize == 1234567) // for debug: unpacking without reparse - if (dataSize != 0) + UInt32 dataSize, propType; + if (_arc->GetRawProps->GetRawProp(_index, kpidNtReparse, &data, &dataSize, &propType) == S_OK + // && dataSize == 1234567 // for debug: unpacking without reparse + && dataSize) { if (propType != NPropDataType::kRaw) return E_FAIL; - // 21.06: we need kpidNtReparse in linux for wim archives created in Windows - // #ifdef _WIN32 - - NtReparse_Data = data; - NtReparse_Size = dataSize; - - CReparseAttr reparse; - bool isOkReparse = reparse.Parse((const Byte *)data, dataSize); - if (isOkReparse) - { - _link.isHardLink = false; - // _link.isCopyLink = false; - _link.linkPath = reparse.GetPath(); - _link.isJunction = reparse.IsMountPoint(); - - if (reparse.IsSymLink_WSL()) - { - _link.isWSL = true; - _link.isRelative = reparse.IsRelative_WSL(); - } - else - _link.isRelative = reparse.IsRelative_Win(); - - // const AString s = GetAnsiString(_link.linkPath); - // printf("\n_link.linkPath: %s\n", s.Ptr()); - - #ifndef _WIN32 - _link.linkPath.Replace(L'\\', WCHAR_PATH_SEPARATOR); - #endif - } - // #endif + // NtReparse_Data = data; + // NtReparse_Size = dataSize; + // we ignore error code here, if there is failure of parsing: + _link.Parse_from_WindowsReparseData((const Byte *)data, dataSize); } } - if (_link.linkPath.IsEmpty()) + if (_link.LinkPath.IsEmpty()) return S_OK; - + // (_link.LinkPath) uses system path separator. + // windows: (_link.LinkPath) doesn't contain linux separator (slash). { - #ifdef _WIN32 - _link.linkPath.Replace(L'/', WCHAR_PATH_SEPARATOR); - #endif - - // rar5 uses "\??\" prefix for absolute links - if (_link.linkPath.IsPrefixedBy(WSTRING_PATH_SEPARATOR L"??" WSTRING_PATH_SEPARATOR)) + // _link.LinkPath = "\\??\\r:\\1\\2"; // for debug + // rar5+ returns kpidSymLink absolute link path with "\??\" prefix. + // we normalize such prefix: + if (_link.LinkPath.IsPrefixedBy(STRING_PATH_SEPARATOR "??" STRING_PATH_SEPARATOR)) { _link.isRelative = false; - _link.linkPath.DeleteFrontal(4); - } - - for (;;) - // while (NName::IsAbsolutePath(linkPath)) - { - unsigned n = NName::GetRootPrefixSize(_link.linkPath); - if (n == 0) - break; - _link.isRelative = false; - _link.linkPath.DeleteFrontal(n); - } - } - - if (_link.linkPath.IsEmpty()) - return S_OK; - - if (!_link.isRelative && _removePathParts.Size() != 0) - { - UStringVector pathParts; - SplitPathToParts(_link.linkPath, pathParts); - bool badPrefix = false; - FOR_VECTOR (i, _removePathParts) - { - if (CompareFileNames(_removePathParts[i], pathParts[i]) != 0) + // we normalize prefix from "\??\" to "\\?\": + _link.LinkPath.ReplaceOneCharAtPos(1, WCHAR_PATH_SEPARATOR); + _link.isWindowsPath = true; + if (_link.LinkPath.IsPrefixedBy_Ascii_NoCase( + STRING_PATH_SEPARATOR + STRING_PATH_SEPARATOR "?" + STRING_PATH_SEPARATOR "UNC" + STRING_PATH_SEPARATOR)) { - badPrefix = true; - break; + // we normalize prefix from "\\?\UNC\path" to "\\path": + _link.LinkPath.DeleteFrontal(6); + _link.LinkPath.ReplaceOneCharAtPos(0, WCHAR_PATH_SEPARATOR); + } + else + { + const unsigned k_prefix_Size = 4; + if (NName::IsDrivePath(_link.LinkPath.Ptr(k_prefix_Size))) + _link.LinkPath.DeleteFrontal(k_prefix_Size); } } - if (!badPrefix) - pathParts.DeleteFrontal(_removePathParts.Size()); - _link.linkPath = MakePathFromParts(pathParts); } - - /* - if (!_link.linkPath.IsEmpty()) - { - printf("\n_link %s to -> %s\n", GetOemString(_item.Path).Ptr(), GetOemString(_link.linkPath).Ptr()); - } - */ - + _link.Normalize_to_RelativeSafe(_removePathParts); return S_OK; } @@ -953,7 +929,7 @@ HRESULT CArchiveExtractCallback::ReadLink() #ifndef _WIN32 static HRESULT GetOwner(IInArchive *archive, - UInt32 index, UInt32 pidName, UInt32 pidId, COwnerInfo &res) + UInt32 index, UInt32 pidName, UInt32 pidId, CProcessedFileInfo::COwnerInfo &res) { { NWindows::NCOM::CPropVariant prop; @@ -961,7 +937,7 @@ static HRESULT GetOwner(IInArchive *archive, if (prop.vt == VT_UI4) { res.Id_Defined = true; - res.Id = prop.ulVal; // for debug + res.Id = prop.ulVal; // res.Id++; // for debug // if (pidId == kpidGroupId) res.Id += 7; // for debug // res.Id = 0; // for debug @@ -993,7 +969,7 @@ static HRESULT GetOwner(IInArchive *archive, HRESULT CArchiveExtractCallback::Read_fi_Props() { - IInArchive *archive = _arc->Archive; + IInArchive * const archive = _arc->Archive; const UInt32 index = _index; _fi.Attrib_Defined = false; @@ -1085,7 +1061,7 @@ void CArchiveExtractCallback::CorrectPathParts() } -void CArchiveExtractCallback::GetFiTimesCAM(CFiTimesCAM &pt) +static void GetFiTimesCAM(const CProcessedFileInfo &fi, CFiTimesCAM &pt, const CArc &arc) { pt.CTime_Defined = false; pt.ATime_Defined = false; @@ -1093,27 +1069,27 @@ void CArchiveExtractCallback::GetFiTimesCAM(CFiTimesCAM &pt) // if (Write_MTime) { - if (_fi.MTime.Def) + if (fi.MTime.Def) { - _fi.MTime.Write_To_FiTime(pt.MTime); + fi.MTime.Write_To_FiTime(pt.MTime); pt.MTime_Defined = true; } - else if (_arc->MTime.Def) + else if (arc.MTime.Def) { - _arc->MTime.Write_To_FiTime(pt.MTime); + arc.MTime.Write_To_FiTime(pt.MTime); pt.MTime_Defined = true; } } - if (/* Write_CTime && */ _fi.CTime.Def) + if (/* Write_CTime && */ fi.CTime.Def) { - _fi.CTime.Write_To_FiTime(pt.CTime); + fi.CTime.Write_To_FiTime(pt.CTime); pt.CTime_Defined = true; } - if (/* Write_ATime && */ _fi.ATime.Def) + if (/* Write_ATime && */ fi.ATime.Def) { - _fi.ATime.Write_To_FiTime(pt.ATime); + fi.ATime.Write_To_FiTime(pt.ATime); pt.ATime_Defined = true; } } @@ -1124,6 +1100,7 @@ void CArchiveExtractCallback::CreateFolders() // 21.04 : we don't change original (_item.PathParts) here UStringVector pathParts = _item.PathParts; + bool isFinal = true; // bool is_DirOp = false; if (!pathParts.IsEmpty()) { @@ -1133,12 +1110,15 @@ void CArchiveExtractCallback::CreateFolders() but if we create dir item here, it's not problem. */ if (!_item.IsDir #ifdef SUPPORT_LINKS - #ifndef WIN32 - || !_link.linkPath.IsEmpty() - #endif + // #ifndef WIN32 + || !_link.LinkPath.IsEmpty() + // #endif #endif ) + { pathParts.DeleteBack(); + isFinal = false; // last path part was excluded + } // else is_DirOp = true; } @@ -1162,7 +1142,7 @@ void CArchiveExtractCallback::CreateFolders() */ FString fullPathNew; - CreateComplexDirectory(pathParts, fullPathNew); + CreateComplexDirectory(pathParts, isFinal, fullPathNew); /* if (is_DirOp) @@ -1183,12 +1163,12 @@ void CArchiveExtractCallback::CreateFolders() return; CDirPathTime pt; - GetFiTimesCAM(pt); + GetFiTimesCAM(_fi, pt, *_arc); if (pt.IsSomeTimeDefined()) { pt.Path = fullPathNew; - pt.SetDirTime(); + pt.SetDirTime_to_FS_2(); _extractedFolders.Add(pt); } } @@ -1273,8 +1253,7 @@ HRESULT CArchiveExtractCallback::CheckExistFile(FString &fullProcessedPath, bool // MyMoveFile can rename folders. So it's OK to use it for folders too if (!MyMoveFile(fullProcessedPath, existPath)) { - HRESULT errorCode = GetLastError_noZero_HRESULT(); - RINOK(SendMessageError2(errorCode, kCantRenameFile, existPath, fullProcessedPath)) + RINOK(SendMessageError2_with_LastError(kCantRenameFile, existPath, fullProcessedPath)) return E_FAIL; } } @@ -1331,9 +1310,11 @@ HRESULT CArchiveExtractCallback::CheckExistFile(FString &fullProcessedPath, bool - - - +/* +return: + needExit = false: caller will use (outStreamLoc) and _hashStreamSpec + needExit = true : caller will not use (outStreamLoc) and _hashStreamSpec. +*/ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtr &outStreamLoc, bool &needExit) { needExit = true; @@ -1341,7 +1322,7 @@ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtrArchive; + IInArchive * const archive = _arc->Archive; #endif const UInt32 index = _index; @@ -1387,7 +1368,7 @@ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtr 0 && _curSize < (1 << 12)) + if (_curSize_Defined && _curSize && _curSize < k_LinkDataSize_LIMIT) { if (_fi.IsLinuxSymLink()) { @@ -1513,7 +1498,7 @@ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtrInit(_outMemBuf, _outMemBuf.Size()); outStreamLoc = _bufPtrSeqOutStream; } - else // not reprase + else // not reparse { if (_ntOptions.PreAllocateOutFile && !_isSplit && _curSize_Defined && _curSize > (1 << 12)) { @@ -1568,7 +1553,7 @@ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtrSeek((Int64)_position, STREAM_SEEK_SET, NULL)) } outStreamLoc = outFileStream_Loc; - } // if not reprase + } // if not reparse _outFileStream = outFileStream_Loc; @@ -1620,8 +1605,7 @@ Z7_COM7F_IMF(CArchiveExtractCallback::GetStream(UInt32 index, ISequentialOutStre _fileLength_WasSet = false; _isRenamed = false; // _fi.Clear(); - _extractMode = false; - // _is_SymLink_in_Data = false; + _extractMode = false; _is_SymLink_in_Data_Linux = false; _needSetAttrib = false; _isSymLinkCreated = false; @@ -1661,7 +1645,7 @@ Z7_COM7F_IMF(CArchiveExtractCallback::GetStream(UInt32 index, ISequentialOutStre } - IInArchive *archive = _arc->Archive; + IInArchive * const archive = _arc->Archive; RINOK(GetItem(index)) @@ -1677,10 +1661,9 @@ Z7_COM7F_IMF(CArchiveExtractCallback::GetStream(UInt32 index, ISequentialOutStre } } - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS RINOK(ReadLink()) - #endif // SUPPORT_LINKS - +#endif RINOK(Archive_GetItemBoolProp(archive, index, kpidEncrypted, _encrypted)) @@ -1989,7 +1972,7 @@ HRESULT CArchiveExtractCallback::CloseFile() #endif CFiTimesCAM t; - GetFiTimesCAM(t); + GetFiTimesCAM(_fi, t, *_arc); // #ifdef _WIN32 if (t.IsSomeTimeDefined()) @@ -2016,88 +1999,275 @@ HRESULT CArchiveExtractCallback::CloseFile() #ifdef SUPPORT_LINKS +static bool CheckLinkPath_in_FS_for_pathParts(const FString &path, const UStringVector &v) +{ + FString path2 = path; + FOR_VECTOR (i, v) + { + // if (i == v.Size() - 1) path = path2; // we don't need last part in returned path + path2 += us2fs(v[i]); + NFind::CFileInfo fi; + // printf("\nCheckLinkPath_in_FS_for_pathParts(): %s\n", GetOemString(path2).Ptr()); + if (fi.Find(path2) && fi.IsOsSymLink()) + return false; + path2.Add_PathSepar(); + } + return true; +} -HRESULT CArchiveExtractCallback::SetFromLinkPath( - const FString &fullProcessedPath, - const CLinkInfo &linkInfo, - bool &linkWasSet) +/* +link.isRelative / relative_item_PathPrefix + false / empty + true / item path without last part +*/ +static bool CheckLinkPath_in_FS( + const FString &pathPrefix_in_FS, + const CPostLink &postLink, + const UString &relative_item_PathPrefix) +{ + const CLinkInfo &link = postLink.LinkInfo; + if (postLink.item_PathParts.IsEmpty() || link.LinkPath.IsEmpty()) + return false; + FString path; + { + const UString &s = postLink.item_PathParts[0]; + if (!s.IsEmpty() && !NName::IsAbsolutePath(s)) + path = pathPrefix_in_FS; // item_PathParts is relative. So we use absolutre prefix + } + if (!CheckLinkPath_in_FS_for_pathParts(path, postLink.item_PathParts)) + return false; + path += us2fs(relative_item_PathPrefix); + UStringVector v; + SplitPathToParts(link.LinkPath, v); + // we check target paths: + return CheckLinkPath_in_FS_for_pathParts(path, v); +} + +static const unsigned k_DangLevel_MAX_for_Link_over_Link = 9; + +HRESULT CArchiveExtractCallback::CreateHardLink2( + const FString &newFilePath, const FString &existFilePath, bool &link_was_Created) const +{ + link_was_Created = false; + if (_ntOptions.SymLinks_DangerousLevel <= k_DangLevel_MAX_for_Link_over_Link) + { + NFind::CFileInfo fi; + if (fi.Find(existFilePath) && fi.IsOsSymLink()) + return SendMessageError2(0, k_HardLink_to_SymLink_Ignored, newFilePath, existFilePath); + } + if (!MyCreateHardLink(newFilePath, existFilePath)) + return SendMessageError2_with_LastError(kCantCreateHardLink, newFilePath, existFilePath); + link_was_Created = true; + return S_OK; +} + + + +HRESULT CArchiveExtractCallback::SetLink( + const FString &fullProcessedPath_from, + const CLinkInfo &link, + bool &linkWasSet) // placeholder was created { linkWasSet = false; - if (!_ntOptions.SymLinks.Val && !linkInfo.isHardLink) + if (link.LinkPath.IsEmpty()) return S_OK; - - UString relatPath; - - /* if (linkInfo.isRelative) - linkInfo.linkPath is final link path that must be stored to file link field - else - linkInfo.linkPath is path from root of archive. So we must add _dirPathPrefix_Full before linkPath. - */ - - if (linkInfo.isRelative) - relatPath = GetDirPrefixOf(_item.Path); - relatPath += linkInfo.linkPath; + if (!_ntOptions.SymLinks.Val && link.Is_AnySymLink()) + return S_OK; + CPostLink postLink; + postLink.Index_in_Arc = _index; + postLink.item_IsDir = _item.IsDir; + postLink.item_Path = _item.Path; + postLink.item_PathParts = _item.PathParts; + postLink.item_FileInfo = _fi; + postLink.fullProcessedPath_from = fullProcessedPath_from; + postLink.LinkInfo = link; + _postLinks.Add(postLink); - if (!IsSafePath(relatPath)) + // file doesn't exist in most cases. So we don't check for error. + DeleteLinkFileAlways_or_RemoveEmptyDir(fullProcessedPath_from, false); // checkThatFileIsEmpty = false + + NIO::COutFile outFile; + if (!outFile.Create_NEW(fullProcessedPath_from)) + return SendMessageError("Cannot create temporary link file", fullProcessedPath_from); +#if 0 // 1 for debug + // here we can write link path to temporary link file placeholder, + // but empty placeholder is better, because we don't want to get any non-eampty data instead of link file. + AString s; + ConvertUnicodeToUTF8(link.LinkPath, s); + outFile.WriteFull(s, s.Len()); +#endif + linkWasSet = true; + return S_OK; +} + + +// if file/dir is symbolic link it will remove only link itself +HRESULT CArchiveExtractCallback::DeleteLinkFileAlways_or_RemoveEmptyDir( + const FString &path, bool checkThatFileIsEmpty) const +{ + NFile::NFind::CFileInfo fi; + if (fi.Find(path)) // followLink = false { - return SendMessageError2( - 0, // errorCode - "Dangerous link path was ignored", - us2fs(_item.Path), - us2fs(linkInfo.linkPath)); // us2fs(relatPath) + if (fi.IsDir()) + { + if (RemoveDirAlways_if_Empty(path)) + return S_OK; + } + else + { + // link file placeholder must be empty + if (checkThatFileIsEmpty && !fi.IsOsSymLink() && fi.Size != 0) + return SendMessageError("Temporary link file is not empty", path); + if (DeleteFileAlways(path)) + return S_OK; + } + if (GetLastError() != ERROR_FILE_NOT_FOUND) + return SendMessageError_with_LastError( + fi.IsDir() ? + k_CantDelete_Dir_for_SymLink: + k_CantDelete_File_for_SymLink, + path); + } + return S_OK; +} + + +/* +in: + link.LinkPath : must be relative (non-absolute) path in any case !!! + link.isRelative / target path that must stored as created link: + == false / _dirPathPrefix_Full + link.LinkPath + == true / link.LinkPath +*/ +static HRESULT SetLink2(const CArchiveExtractCallback &callback, + const CPostLink &postLink, bool &linkWasSet) +{ + const CLinkInfo &link = postLink.LinkInfo; + const FString &fullProcessedPath_from = postLink.fullProcessedPath_from; // full file path in FS (fullProcessedPath_from) + + const unsigned level = callback._ntOptions.SymLinks_DangerousLevel; + if (level < 20) + { + /* + We want to use additional check for links that can link to directory. + - linux: all symbolic links are files. + - windows: we can have file/directory symbolic link, + but file symbolic link works like directory link in windows. + So we use additional check for all relative links. + + We don't allow decreasing of final level of link. + So if some another extracted file will use this link, + then number of real path parts (after link redirection) cannot be + smaller than number of requested path parts from archive records. + + here we check only (link.LinkPath) without (_item.PathParts). + */ + CLinkLevelsInfo li; + li.Parse(link.LinkPath, link.Is_WSL()); + bool isDang; + UString relativePathPrefix; + if (li.IsAbsolute // unexpected + || li.ParentDirDots_after_NonParent + || (level <= 5 && link.isRelative && li.FinalLevel < 1) // final level lower + || (level <= 5 && link.isRelative && li.LowLevel < 0) // negative temporary levels + ) + isDang = true; + else // if (!isDang) + { + UString path; + if (link.isRelative) + { + // item_PathParts : parts that will be created in output folder. + // we want to get directory prefix of link item. + // so we remove file name (last non-empty part) from PathParts: + UStringVector v = postLink.item_PathParts; + while (!v.IsEmpty()) + { + const unsigned len = v.Back().Len(); + v.DeleteBack(); + if (len) + break; + } + path = MakePathFromParts(v); + NName::NormalizeDirPathPrefix(path); + relativePathPrefix = path; + } + path += link.LinkPath; + /* + path is calculated virtual target path of link + path is relative to root folder of extracted items + if (!link.isRelative), then (path == link.LinkPath) + */ + isDang = false; + if (!IsSafePath(path, link.Is_WSL())) + isDang = true; + } + const char *message = NULL; + if (isDang) + message = "Dangerous link path was ignored"; + else if (level <= k_DangLevel_MAX_for_Link_over_Link + && !CheckLinkPath_in_FS(callback._dirPathPrefix_Full, + postLink, relativePathPrefix)) + message = "Dangerous link via another link was ignored"; + if (message) + return callback.SendMessageError2(0, // errorCode + message, us2fs(postLink.item_Path), us2fs(link.LinkPath)); } - FString existPath; - if (linkInfo.isHardLink /* || linkInfo.IsCopyLink */ || !linkInfo.isRelative) + FString target; // target path that will be stored to link field + if (link.Is_HardLink() /* || link.IsCopyLink */ || !link.isRelative) { - if (!NName::GetFullPath(_dirPathPrefix_Full, us2fs(relatPath), existPath)) - { - RINOK(SendMessageError("Incorrect path", us2fs(relatPath))) - } + // isRelative == false + // all hard links and absolute symbolic links + // relatPath == link.LinkPath + // we get absolute link path for target: + if (!NName::GetFullPath(callback._dirPathPrefix_Full, us2fs(link.LinkPath), target)) + return callback.SendMessageError("Incorrect link path", us2fs(link.LinkPath)); + // (target) is (_dirPathPrefix_Full + relatPath) } else { - existPath = us2fs(linkInfo.linkPath); - // printf("\nlinkPath = : %s\n", GetOemString(linkInfo.linkPath).Ptr()); + // link.isRelative == true + // relative symbolic links only + target = us2fs(link.LinkPath); } - - if (existPath.IsEmpty()) - return SendMessageError("Empty link", fullProcessedPath); + if (target.IsEmpty()) + return callback.SendMessageError("Empty link", fullProcessedPath_from); - if (linkInfo.isHardLink /* || linkInfo.IsCopyLink */) + if (link.Is_HardLink() /* || link.IsCopyLink */) { - // if (linkInfo.isHardLink) + // if (link.isHardLink) { - if (!MyCreateHardLink(fullProcessedPath, existPath)) + RINOK(callback.DeleteLinkFileAlways_or_RemoveEmptyDir(fullProcessedPath_from, true)) // checkThatFileIsEmpty { - const HRESULT errorCode = GetLastError_noZero_HRESULT(); - RINOK(SendMessageError2(errorCode, kCantCreateHardLink, fullProcessedPath, existPath)) + // RINOK(SendMessageError_with_LastError(k_Cant_DeleteTempLinkFile, fullProcessedPath_from)) } + return callback.CreateHardLink2(fullProcessedPath_from, target, linkWasSet); /* RINOK(PrepareOperation(NArchive::NExtract::NAskMode::kExtract)) _op_WasReported = true; RINOK(SetOperationResult(NArchive::NExtract::NOperationResult::kOK)) - */ linkWasSet = true; return S_OK; + */ } /* // IsCopyLink { NFind::CFileInfo fi; - if (!fi.Find(existPath)) + if (!fi.Find(target)) { - RINOK(SendMessageError2("Cannot find the file for copying", existPath, fullProcessedPath)); + RINOK(SendMessageError2("Cannot find the file for copying", target, fullProcessedPath)); } else { if (_curSize_Defined && _curSize == fi.Size) - _copyFile_Path = existPath; + _copyFile_Path = target; else { - RINOK(SendMessageError2("File size collision for file copying", existPath, fullProcessedPath)); + RINOK(SendMessageError2("File size collision for file copying", target, fullProcessedPath)); } - // RINOK(MyCopyFile(existPath, fullProcessedPath)); + // RINOK(MyCopyFile(target, fullProcessedPath)); } } */ @@ -2111,127 +2281,227 @@ HRESULT CArchiveExtractCallback::SetFromLinkPath( // Windows before Vista doesn't support symbolic links. // we could convert such symbolic links to Junction Points // isJunction = true; - // convertToAbs = true; } */ - if (!_ntOptions.SymLinks_AllowDangerous.Val) - { - #ifdef _WIN32 - if (_item.IsDir) - #endif - if (linkInfo.isRelative) - { - CLinkLevelsInfo levelsInfo; - levelsInfo.Parse(linkInfo.linkPath); - if (levelsInfo.FinalLevel < 1 || levelsInfo.IsAbsolute) - { - return SendMessageError2( - 0, // errorCode - "Dangerous symbolic link path was ignored", - us2fs(_item.Path), - us2fs(linkInfo.linkPath)); - } - } - } +#ifdef _WIN32 + const bool isDir = (postLink.item_IsDir || link.LinkType == k_LinkType_Junction); +#endif - - #ifdef _WIN32 - + +#ifdef _WIN32 CByteBuffer data; - // printf("\nFillLinkData(): %s\n", GetOemString(existPath).Ptr()); - if (!FillLinkData(data, fs2us(existPath), !linkInfo.isJunction, linkInfo.isWSL)) - return SendMessageError("Cannot fill link data", us2fs(_item.Path)); - + // printf("\nFillLinkData(): %s\n", GetOemString(target).Ptr()); + if (link.Is_WSL()) + { + Convert_WinPath_to_WslLinuxPath(target, !link.isRelative); + FillLinkData_WslLink(data, fs2us(target)); + } + else + FillLinkData_WinLink(data, fs2us(target), link.LinkType != k_LinkType_Junction); + if (data.Size() == 0) + return callback.SendMessageError("Cannot fill link data", us2fs(postLink.item_Path)); /* if (NtReparse_Size != data.Size() || memcmp(NtReparse_Data, data, data.Size()) != 0) - { - SendMessageError("reconstructed Reparse is different", fs2us(existPath)); - } + SendMessageError("reconstructed Reparse is different", fs2us(target)); */ - - CReparseAttr attr; - if (!attr.Parse(data, data.Size())) { - RINOK(SendMessageError("Internal error for symbolic link file", us2fs(_item.Path))) - return S_OK; + // we check that reparse data is correct, but we ignore attr.MinorError. + CReparseAttr attr; + if (!attr.Parse(data, data.Size())) + return callback.SendMessageError("Internal error for symbolic link file", us2fs(postLink.item_Path)); } - if (!NFile::NIO::SetReparseData(fullProcessedPath, _item.IsDir, data, (DWORD)data.Size())) +#endif + + RINOK(callback.DeleteLinkFileAlways_or_RemoveEmptyDir(fullProcessedPath_from, true)) // checkThatFileIsEmpty +#ifdef _WIN32 + if (!NFile::NIO::SetReparseData(fullProcessedPath_from, isDir, data, (DWORD)data.Size())) +#else // ! _WIN32 + if (!NFile::NIO::SetSymLink(fullProcessedPath_from, target)) +#endif // ! _WIN32 { - RINOK(SendMessageError_with_LastError(kCantCreateSymLink, fullProcessedPath)) - return S_OK; + return callback.SendMessageError_with_LastError(kCantCreateSymLink, fullProcessedPath_from); } linkWasSet = true; - return S_OK; - - - #else // ! _WIN32 - - if (!NFile::NIO::SetSymLink(fullProcessedPath, existPath)) - { - RINOK(SendMessageError_with_LastError(kCantCreateSymLink, fullProcessedPath)) - return S_OK; - } - linkWasSet = true; - - return S_OK; - - #endif // ! _WIN32 } -bool CLinkInfo::Parse(const Byte *data, size_t dataSize, bool isLinuxData) -{ - Clear(); - // this->isLinux = isLinuxData; - - if (isLinuxData) - { - isJunction = false; - isHardLink = false; - AString utf; - if (dataSize >= (1 << 12)) - return false; - utf.SetFrom_CalcLen((const char *)data, (unsigned)dataSize); - UString u; - if (!ConvertUTF8ToUnicode(utf, u)) - return false; - linkPath = u; - - // in linux symbolic data: we expect that linux separator '/' is used - // if windows link was created, then we also must use linux separator - if (u.IsEmpty()) - return false; - const wchar_t c = u[0]; - isRelative = !IS_PATH_SEPAR(c); - return true; - } +bool CLinkInfo::Parse_from_WindowsReparseData(const Byte *data, size_t dataSize) +{ CReparseAttr reparse; if (!reparse.Parse(data, dataSize)) return false; - isHardLink = false; - // isCopyLink = false; - linkPath = reparse.GetPath(); - isJunction = reparse.IsMountPoint(); - + // const AString s = GetAnsiString(LinkPath); + // printf("\nlinkPath: %s\n", s.Ptr()); + LinkPath = reparse.GetPath(); if (reparse.IsSymLink_WSL()) { - isWSL = true; - isRelative = reparse.IsRelative_WSL(); + LinkType = k_LinkType_WSL; + isRelative = reparse.IsRelative_WSL(); // detected from LinkPath[0] + // LinkPath is original raw name converted to UString from AString + // Linux separator '/' is expected here. + REPLACE_SLASHES_from_Linux_to_Sys(LinkPath) } else - isRelative = reparse.IsRelative_Win(); - - // FIXME !!! - #ifndef _WIN32 - linkPath.Replace(L'\\', WCHAR_PATH_SEPARATOR); - #endif - + { + LinkType = reparse.IsMountPoint() ? k_LinkType_Junction : k_LinkType_PureSymLink; + isRelative = reparse.IsRelative_Win(); // detected by (Flags == Z7_WIN_SYMLINK_FLAG_RELATIVE) + isWindowsPath = true; + // LinkPath is original windows link path from raparse data with \??\ prefix removed. + // windows '\\' separator is expected here. + // linux '/' separator is not expected here. + // we translate both types of separators to system separator. + LinkPath.Replace( +#if WCHAR_PATH_SEPARATOR == L'\\' + L'/' +#else + L'\\' +#endif + , WCHAR_PATH_SEPARATOR); + } + // (LinkPath) uses system path separator. + // windows: (LinkPath) doesn't contain linux separator (slash). + return true; +} + + +bool CLinkInfo::Parse_from_LinuxData(const Byte *data, size_t dataSize) +{ + // Clear(); // *this object was cleared by constructor already. + LinkType = k_LinkType_PureSymLink; + AString utf; + if (dataSize >= k_LinkDataSize_LIMIT) + return false; + utf.SetFrom_CalcLen((const char *)data, (unsigned)dataSize); + UString u; + if (!ConvertUTF8ToUnicode(utf, u)) + return false; + if (u.IsEmpty()) + return false; + const wchar_t c = u[0]; + isRelative = (c != L'/'); + // linux path separator is expected + REPLACE_SLASHES_from_Linux_to_Sys(u) + LinkPath = u; + // (LinkPath) uses system path separator. + // windows: (LinkPath) doesn't contain linux separator (slash). return true; } + +// in/out: (LinkPath) uses system path separator +// in/out: windows: (LinkPath) doesn't contain linux separator (slash). +// out: (LinkPath) is relative path, and LinkPath[0] is not path separator +// out: isRelative changed to false, if any prefix was removed. +// note: absolute windows links "c:\" to root will be reduced to empty string: +void CLinkInfo::Remove_AbsPathPrefixes() +{ + while (!LinkPath.IsEmpty()) + { + unsigned n = 0; + if (!Is_WSL()) + { + n = +#ifndef _WIN32 + isWindowsPath ? + NName::GetRootPrefixSize_WINDOWS(LinkPath) : +#endif + NName::GetRootPrefixSize(LinkPath); +/* + // "c:path" will be ignored later as "Dangerous absolute path" + // so check is not required + if (n == 0 +#ifndef _WIN32 + && isWindowsPath +#endif + && NName::IsDrivePath2(LinkPath)) + n = 2; +*/ + } + if (n == 0) + { + if (!IS_PATH_SEPAR(LinkPath[0])) + break; + n = 1; + } + isRelative = false; // (LinkPath) will be treated as relative to root folder of archive + LinkPath.DeleteFrontal(n); + } +} + + +/* + it removes redundant separators, if there are double separators, + but it keeps double separators at start of string //name/. + in/out: system path separator is used + windows: slash character (linux separator) is not treated as separator + windows: (path) doesn't contain linux separator (slash). +*/ +static void RemoveRedundantPathSeparators(UString &path) +{ + wchar_t *dest = path.GetBuf(); + const wchar_t * const start = dest; + const wchar_t *src = dest; + for (;;) + { + wchar_t c = *src++; + if (c == 0) + break; + // if (IS_PATH_SEPAR(c)) // for Windows: we can change (/) to (\). + if (c == WCHAR_PATH_SEPARATOR) + { + if (dest - start >= 2 && dest[-1] == WCHAR_PATH_SEPARATOR) + continue; + // c = WCHAR_PATH_SEPARATOR; // for Windows: we can change (/) to (\). + } + *dest++ = c; + } + *dest = 0; + path.ReleaseBuf_SetLen((unsigned)(dest - path.Ptr())); +} + + +// in/out: (LinkPath) uses system path separator +// in/out: windows: (LinkPath) doesn't contain linux separator (slash). +// out: (LinkPath) is relative path, and LinkPath[0] is not path separator +void CLinkInfo::Normalize_to_RelativeSafe(UStringVector &removePathParts) +{ + // We WILL NOT WRITE original absolute link path from archive to filesystem. + // So here we remove all root prefixes from (LinkPath). + // If we see any absolute root prefix, then we suppose that this prefix is virtual prefix + // that shows that link is relative to root folder of archive + RemoveRedundantPathSeparators(LinkPath); + // LinkPath = "\\\\?\\r:test\\test2"; // for debug + Remove_AbsPathPrefixes(); + // (LinkPath) now is relative: + // if (isRelative == false), then (LinkPath) is relative to root folder of archive + // if (isRelative == true ), then (LinkPath) is relative to current item + if (LinkPath.IsEmpty() || isRelative || removePathParts.Size() == 0) + return; + + // if LinkPath is prefixed by _removePathParts, we remove these paths + UStringVector pathParts; + SplitPathToParts(LinkPath, pathParts); + bool badPrefix = false; + { + FOR_VECTOR (i, removePathParts) + { + if (i >= pathParts.Size() + || CompareFileNames(removePathParts[i], pathParts[i]) != 0) + { + badPrefix = true; + break; + } + } + } + if (!badPrefix) + pathParts.DeleteFrontal(removePathParts.Size()); + LinkPath = MakePathFromParts(pathParts); + Remove_AbsPathPrefixes(); +} + #endif // SUPPORT_LINKS @@ -2239,12 +2509,12 @@ HRESULT CArchiveExtractCallback::CloseReparseAndFile() { HRESULT res = S_OK; - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS size_t reparseSize = 0; bool repraseMode = false; bool needSetReparse = false; - CLinkInfo linkInfo; + CLinkInfo link; if (_bufPtrSeqOutStream) { @@ -2258,15 +2528,19 @@ HRESULT CArchiveExtractCallback::CloseReparseAndFile() needSetReparse = reparse.Parse(_outMemBuf, reparseSize, errorCode); if (needSetReparse) { - UString linkPath = reparse.GetPath(); + UString LinkPath = reparse.GetPath(); #ifndef _WIN32 - linkPath.Replace(L'\\', WCHAR_PATH_SEPARATOR); + LinkPath.Replace(L'\\', WCHAR_PATH_SEPARATOR); #endif } */ - needSetReparse = linkInfo.Parse(_outMemBuf, reparseSize, _is_SymLink_in_Data_Linux); + needSetReparse = _is_SymLink_in_Data_Linux ? + link.Parse_from_LinuxData(_outMemBuf, reparseSize) : + link.Parse_from_WindowsReparseData(_outMemBuf, reparseSize); if (!needSetReparse) res = SendMessageError_with_LastError("Incorrect reparse stream", us2fs(_item.Path)); + // (link.LinkPath) uses system path separator. + // windows: (link.LinkPath) doesn't contain linux separator (slash). } else { @@ -2281,25 +2555,21 @@ HRESULT CArchiveExtractCallback::CloseReparseAndFile() _bufPtrSeqOutStream.Release(); } - #endif // SUPPORT_LINKS - +#endif // SUPPORT_LINKS const HRESULT res2 = CloseFile(); - if (res == S_OK) res = res2; - RINOK(res) - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS if (repraseMode) { _curSize = reparseSize; _curSize_Defined = true; - - #ifdef SUPPORT_LINKS if (needSetReparse) { + // empty file was created so we must delete it. // in Linux : we must delete empty file before symbolic link creation // in Windows : we can create symbolic link even without file deleting if (!DeleteFileAlways(_diskFilePath)) @@ -2307,42 +2577,57 @@ HRESULT CArchiveExtractCallback::CloseReparseAndFile() RINOK(SendMessageError_with_LastError("can't delete file", _diskFilePath)) } { - /* - // for DEBUG ONLY: we can extract sym links as WSL links - // to eliminate (non-admin) errors for sym links. - #ifdef _WIN32 - if (!linkInfo.isHardLink && !linkInfo.isJunction) - linkInfo.isWSL = true; - #endif - */ bool linkWasSet = false; - RINOK(SetFromLinkPath(_diskFilePath, linkInfo, linkWasSet)) + // link.LinkPath = "r:\\1\\2"; // for debug + // link.isJunction = true; // for debug + link.Normalize_to_RelativeSafe(_removePathParts); + RINOK(SetLink(_diskFilePath, link, linkWasSet)) +/* + // we don't set attributes for placeholder. if (linkWasSet) - _isSymLinkCreated = linkInfo.IsSymLink(); + _isSymLinkCreated = true; // link.IsSymLink(); else +*/ _needSetAttrib = false; } - /* - if (!NFile::NIO::SetReparseData(_diskFilePath, _item.IsDir, )) - { - res = SendMessageError_with_LastError(kCantCreateSymLink, _diskFilePath); - } - */ } - #endif } - #endif +#endif // SUPPORT_LINKS return res; } -void CArchiveExtractCallback::SetAttrib() +static void SetAttrib_Base(const FString &path, const CProcessedFileInfo &fi, + const CArchiveExtractCallback &callback) { - #ifndef _WIN32 +#ifndef _WIN32 + if (fi.Owner.Id_Defined && + fi.Group.Id_Defined) + { + if (my_chown(path, fi.Owner.Id, fi.Group.Id) != 0) + callback.SendMessageError_with_LastError("Cannot set owner", path); + } +#endif + + if (fi.Attrib_Defined) + { + // const AString s = GetAnsiString(_diskFilePath); + // printf("\nSetFileAttrib_PosixHighDetect: %s: hex:%x\n", s.Ptr(), _fi.Attrib); + if (!SetFileAttrib_PosixHighDetect(path, fi.Attrib)) + { + // do we need error message here in Windows and in posix? + callback.SendMessageError_with_LastError("Cannot set file attribute", path); + } + } +} + +void CArchiveExtractCallback::SetAttrib() const +{ +#ifndef _WIN32 // Linux now doesn't support permissions for symlinks if (_isSymLinkCreated) return; - #endif +#endif if (_itemFailure || _diskFilePath.IsEmpty() @@ -2350,31 +2635,41 @@ void CArchiveExtractCallback::SetAttrib() || !_extractMode) return; - #ifndef _WIN32 - if (_fi.Owner.Id_Defined && - _fi.Group.Id_Defined) - { - if (my_chown(_diskFilePath, _fi.Owner.Id, _fi.Group.Id) != 0) - { - SendMessageError_with_LastError("Cannot set owner", _diskFilePath); - } - } - #endif - - if (_fi.Attrib_Defined) - { - // const AString s = GetAnsiString(_diskFilePath); - // printf("\nSetFileAttrib_PosixHighDetect: %s: hex:%x\n", s.Ptr(), _fi.Attrib); - bool res = SetFileAttrib_PosixHighDetect(_diskFilePath, _fi.Attrib); - if (!res) - { - // do we need error message here in Windows and in posix? - SendMessageError_with_LastError("Cannot set file attribute", _diskFilePath); - } - } + SetAttrib_Base(_diskFilePath, _fi, *this); } +#ifdef Z7_USE_SECURITY_CODE +HRESULT CArchiveExtractCallback::SetSecurityInfo(UInt32 indexInArc, const FString &path) const +{ + if (!_stdOutMode && _extractMode && _ntOptions.NtSecurity.Val && _arc->GetRawProps) + { + const void *data; + UInt32 dataSize; + UInt32 propType; + _arc->GetRawProps->GetRawProp(indexInArc, kpidNtSecure, &data, &dataSize, &propType); + if (dataSize != 0) + { + if (propType != NPropDataType::kRaw) + return E_FAIL; + if (CheckNtSecure((const Byte *)data, dataSize)) + { + SECURITY_INFORMATION securInfo = DACL_SECURITY_INFORMATION | GROUP_SECURITY_INFORMATION | OWNER_SECURITY_INFORMATION; + if (_saclEnabled) + securInfo |= SACL_SECURITY_INFORMATION; + // if (! + ::SetFileSecurityW(fs2us(path), securInfo, (PSECURITY_DESCRIPTOR)(void *)(const Byte *)(data)); + { + // RINOK(SendMessageError_with_LastError("SetFileSecurity FAILS", path)) + } + } + } + } + return S_OK; +} +#endif // Z7_USE_SECURITY_CODE + + Z7_COM7F_IMF(CArchiveExtractCallback::SetOperationResult(Int32 opRes)) { COM_TRY_BEGIN @@ -2410,27 +2705,9 @@ Z7_COM7F_IMF(CArchiveExtractCallback::SetOperationResult(Int32 opRes)) RINOK(CloseReparseAndFile()) - #ifdef Z7_USE_SECURITY_CODE - if (!_stdOutMode && _extractMode && _ntOptions.NtSecurity.Val && _arc->GetRawProps) - { - const void *data; - UInt32 dataSize; - UInt32 propType; - _arc->GetRawProps->GetRawProp(_index, kpidNtSecure, &data, &dataSize, &propType); - if (dataSize != 0) - { - if (propType != NPropDataType::kRaw) - return E_FAIL; - if (CheckNtSecure((const Byte *)data, dataSize)) - { - SECURITY_INFORMATION securInfo = DACL_SECURITY_INFORMATION | GROUP_SECURITY_INFORMATION | OWNER_SECURITY_INFORMATION; - if (_saclEnabled) - securInfo |= SACL_SECURITY_INFORMATION; - ::SetFileSecurityW(fs2us(_diskFilePath), securInfo, (PSECURITY_DESCRIPTOR)(void *)(const Byte *)(data)); - } - } - } - #endif // Z7_USE_SECURITY_CODE +#ifdef Z7_USE_SECURITY_CODE + RINOK(SetSecurityInfo(_index, _diskFilePath)) +#endif if (!_curSize_Defined) GetUnpackSize(); @@ -2674,15 +2951,58 @@ void CDirPathSortPair::SetNumSlashes(const FChar *s) } -bool CDirPathTime::SetDirTime() const +bool CFiTimesCAM::SetDirTime_to_FS(CFSTR path) const { - return NDir::SetDirTime(Path, + // it's same function for dir and for file + return NDir::SetDirTime(path, CTime_Defined ? &CTime : NULL, ATime_Defined ? &ATime : NULL, MTime_Defined ? &MTime : NULL); } +#ifdef SUPPORT_LINKS + +bool CFiTimesCAM::SetLinkFileTime_to_FS(CFSTR path) const +{ + // it's same function for dir and for file + return NDir::SetLinkFileTime(path, + CTime_Defined ? &CTime : NULL, + ATime_Defined ? &ATime : NULL, + MTime_Defined ? &MTime : NULL); +} + +HRESULT CArchiveExtractCallback::SetPostLinks() const +{ + FOR_VECTOR (i, _postLinks) + { + const CPostLink &link = _postLinks[i]; + bool linkWasSet = false; + RINOK(SetLink2(*this, link, linkWasSet)) + if (linkWasSet) + { +#ifdef _WIN32 + // Linux now doesn't support permissions for symlinks + SetAttrib_Base(link.fullProcessedPath_from, link.item_FileInfo, *this); +#endif + + CFiTimesCAM pt; + GetFiTimesCAM(link.item_FileInfo, pt, *_arc); + if (pt.IsSomeTimeDefined()) + pt.SetLinkFileTime_to_FS(link.fullProcessedPath_from); + +#ifdef Z7_USE_SECURITY_CODE + // we set security information after timestamps setting + RINOK(SetSecurityInfo(link.Index_in_Arc, link.fullProcessedPath_from)) +#endif + } + } + return S_OK; +} + +#endif + + HRESULT CArchiveExtractCallback::SetDirsTimes() { if (!_arc) @@ -2706,7 +3026,7 @@ HRESULT CArchiveExtractCallback::SetDirsTimes() for (i = 0; i < pairs.Size(); i++) { const CDirPathTime &dpt = _extractedFolders[pairs[i].Index]; - if (!dpt.SetDirTime()) + if (!dpt.SetDirTime_to_FS_2()) { // result = E_FAIL; // do we need error message here in Windows and in posix? @@ -2738,10 +3058,20 @@ HRESULT CArchiveExtractCallback::SetDirsTimes() HRESULT CArchiveExtractCallback::CloseArc() { + // we call CloseReparseAndFile() here because we can have non-closed file in some cases? HRESULT res = CloseReparseAndFile(); - const HRESULT res2 = SetDirsTimes(); - if (res == S_OK) - res = res2; +#ifdef SUPPORT_LINKS + { + const HRESULT res2 = SetPostLinks(); + if (res == S_OK) + res = res2; + } +#endif + { + const HRESULT res2 = SetDirsTimes(); + if (res == S_OK) + res = res2; + } _arc = NULL; return res; } diff --git a/CPP/7zip/UI/Common/ArchiveExtractCallback.h b/CPP/7zip/UI/Common/ArchiveExtractCallback.h index 0988af2..da15d2e 100644 --- a/CPP/7zip/UI/Common/ArchiveExtractCallback.h +++ b/CPP/7zip/UI/Common/ArchiveExtractCallback.h @@ -52,7 +52,6 @@ struct CExtractNtOptions { CBoolPair NtSecurity; CBoolPair SymLinks; - CBoolPair SymLinks_AllowDangerous; CBoolPair HardLinks; CBoolPair AltStreams; bool ReplaceColonForAltStream; @@ -66,6 +65,8 @@ struct CExtractNtOptions bool PreserveATime; bool OpenShareForWrite; + unsigned SymLinks_DangerousLevel; + UInt64 MemLimit; CExtractNtOptions(): @@ -74,10 +75,10 @@ struct CExtractNtOptions ExtractOwner(false), PreserveATime(false), OpenShareForWrite(false), + SymLinks_DangerousLevel(5), MemLimit((UInt64)(Int64)-1) { SymLinks.Val = true; - SymLinks_AllowDangerous.Val = false; HardLinks.Val = true; AltStreams.Val = true; @@ -166,53 +167,79 @@ struct CFiTimesCAM ATime_Defined | MTime_Defined; } + bool SetDirTime_to_FS(CFSTR path) const; +#ifdef SUPPORT_LINKS + bool SetLinkFileTime_to_FS(CFSTR path) const; +#endif }; struct CDirPathTime: public CFiTimesCAM { FString Path; - bool SetDirTime() const; + bool SetDirTime_to_FS_2() const { return SetDirTime_to_FS(Path); } }; #ifdef SUPPORT_LINKS +enum ELinkType +{ + k_LinkType_HardLink, + k_LinkType_PureSymLink, + k_LinkType_Junction, + k_LinkType_WSL + // , k_LinkType_CopyLink; +}; + + struct CLinkInfo { - // bool isCopyLink; - bool isHardLink; - bool isJunction; + ELinkType LinkType; bool isRelative; - bool isWSL; - UString linkPath; + // if (isRelative == false), then (LinkPath) is relative to root folder of archive + // if (isRelative == true ), then (LinkPath) is relative to current item + bool isWindowsPath; + UString LinkPath; - bool IsSymLink() const { return !isHardLink; } + bool Is_HardLink() const { return LinkType == k_LinkType_HardLink; } + bool Is_AnySymLink() const { return LinkType != k_LinkType_HardLink; } + + bool Is_WSL() const { return LinkType == k_LinkType_WSL; } CLinkInfo(): - // IsCopyLink(false), - isHardLink(false), - isJunction(false), + LinkType(k_LinkType_PureSymLink), isRelative(false), - isWSL(false) + isWindowsPath(false) {} void Clear() { - // IsCopyLink = false; - isHardLink = false; - isJunction = false; + LinkType = k_LinkType_PureSymLink; isRelative = false; - isWSL = false; - linkPath.Empty(); + isWindowsPath = false; + LinkPath.Empty(); } - bool Parse(const Byte *data, size_t dataSize, bool isLinuxData); + bool Parse_from_WindowsReparseData(const Byte *data, size_t dataSize); + bool Parse_from_LinuxData(const Byte *data, size_t dataSize); + void Normalize_to_RelativeSafe(UStringVector &removePathParts); +private: + void Remove_AbsPathPrefixes(); }; #endif // SUPPORT_LINKS + +struct CProcessedFileInfo +{ + CArcTime CTime; + CArcTime ATime; + CArcTime MTime; + UInt32 Attrib; + bool Attrib_Defined; + #ifndef _WIN32 struct COwnerInfo @@ -229,8 +256,76 @@ struct COwnerInfo } }; + COwnerInfo Owner; + COwnerInfo Group; #endif + void Clear() + { +#ifndef _WIN32 + Attrib_Defined = false; + Owner.Clear(); +#endif + } + + bool IsReparse() const + { + return (Attrib_Defined && (Attrib & FILE_ATTRIBUTE_REPARSE_POINT) != 0); + } + + bool IsLinuxSymLink() const + { + return (Attrib_Defined && MY_LIN_S_ISLNK(Attrib >> 16)); + } + + void SetFromPosixAttrib(UInt32 a) + { + // here we set only part of combined attribute required by SetFileAttrib() call + #ifdef _WIN32 + // Windows sets FILE_ATTRIBUTE_NORMAL, if we try to set 0 as attribute. + Attrib = MY_LIN_S_ISDIR(a) ? + FILE_ATTRIBUTE_DIRECTORY : + FILE_ATTRIBUTE_ARCHIVE; + if ((a & 0222) == 0) // (& S_IWUSR) in p7zip + Attrib |= FILE_ATTRIBUTE_READONLY; + // 22.00 : we need type bits for (MY_LIN_S_IFLNK) for IsLinuxSymLink() + a &= MY_LIN_S_IFMT; + if (a == MY_LIN_S_IFLNK) + Attrib |= (a << 16); + #else + Attrib = (a << 16) | FILE_ATTRIBUTE_UNIX_EXTENSION; + #endif + Attrib_Defined = true; + } +}; + + +#ifdef SUPPORT_LINKS + +struct CPostLink +{ + UInt32 Index_in_Arc; + bool item_IsDir; // _item.IsDir + UString item_Path; // _item.Path; + UStringVector item_PathParts; // _item.PathParts; + CProcessedFileInfo item_FileInfo; // _fi + FString fullProcessedPath_from; // full file path in FS + CLinkInfo LinkInfo; +}; + +/* +struct CPostLinks +{ + void Clear() + { + Links.Clear(); + } +}; +*/ + +#endif // SUPPORT_LINKS + + class CArchiveExtractCallback Z7_final: public IArchiveExtractCallback, @@ -278,8 +373,9 @@ public: private: const CArc *_arc; +public: CExtractNtOptions _ntOptions; - +private: bool _encrypted; bool _isSplit; bool _curSize_Defined; @@ -287,8 +383,8 @@ private: bool _isRenamed; bool _extractMode; - // bool _is_SymLink_in_Data; - bool _is_SymLink_in_Data_Linux; // false = WIN32, true = LINUX + bool _is_SymLink_in_Data_Linux; // false = WIN32, true = LINUX. + // _is_SymLink_in_Data_Linux is detected from Windows/Linux part of attributes of file. bool _needSetAttrib; bool _isSymLinkCreated; bool _itemFailure; @@ -311,7 +407,9 @@ private: CMyComPtr _cryptoGetTextPassword; FString _dirPathPrefix; +public: FString _dirPathPrefix_Full; +private: #ifndef Z7_SFX @@ -323,49 +421,7 @@ private: CReadArcItem _item; FString _diskFilePath; - struct CProcessedFileInfo - { - CArcTime CTime; - CArcTime ATime; - CArcTime MTime; - UInt32 Attrib; - bool Attrib_Defined; - - #ifndef _WIN32 - COwnerInfo Owner; - COwnerInfo Group; - #endif - - bool IsReparse() const - { - return (Attrib_Defined && (Attrib & FILE_ATTRIBUTE_REPARSE_POINT) != 0); - } - - bool IsLinuxSymLink() const - { - return (Attrib_Defined && MY_LIN_S_ISLNK(Attrib >> 16)); - } - - void SetFromPosixAttrib(UInt32 a) - { - // here we set only part of combined attribute required by SetFileAttrib() call - #ifdef _WIN32 - // Windows sets FILE_ATTRIBUTE_NORMAL, if we try to set 0 as attribute. - Attrib = MY_LIN_S_ISDIR(a) ? - FILE_ATTRIBUTE_DIRECTORY : - FILE_ATTRIBUTE_ARCHIVE; - if ((a & 0222) == 0) // (& S_IWUSR) in p7zip - Attrib |= FILE_ATTRIBUTE_READONLY; - // 22.00 : we need type bits for (MY_LIN_S_IFLNK) for IsLinuxSymLink() - a &= MY_LIN_S_IFMT; - if (a == MY_LIN_S_IFLNK) - Attrib |= (a << 16); - #else - Attrib = (a << 16) | FILE_ATTRIBUTE_UNIX_EXTENSION; - #endif - Attrib_Defined = true; - } - } _fi; + CProcessedFileInfo _fi; UInt64 _position; UInt64 _curSize; @@ -407,19 +463,21 @@ private: // CObjectVector _delayedSymLinks; #endif - void CreateComplexDirectory(const UStringVector &dirPathParts, FString &fullPath); + void CreateComplexDirectory( + const UStringVector &dirPathParts, bool isFinal, FString &fullPath); HRESULT GetTime(UInt32 index, PROPID propID, CArcTime &ft); HRESULT GetUnpackSize(); FString Hash_GetFullFilePath(); - void SetAttrib(); + void SetAttrib() const; public: - HRESULT SendMessageError(const char *message, const FString &path); - HRESULT SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path); - HRESULT SendMessageError_with_LastError(const char *message, const FString &path); - HRESULT SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2); + HRESULT SendMessageError(const char *message, const FString &path) const; + HRESULT SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path) const; + HRESULT SendMessageError_with_LastError(const char *message, const FString &path) const; + HRESULT SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2) const; + HRESULT SendMessageError2_with_LastError(const char *message, const FString &path1, const FString &path2) const; #if defined(_WIN32) && !defined(UNDER_CE) && !defined(Z7_SFX) NExtract::NZoneIdMode::EEnum ZoneMode; @@ -482,23 +540,32 @@ public: UInt64 packSize); - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS private: CHardLinks _hardLinks; + CObjectVector _postLinks; CLinkInfo _link; + // const void *NtReparse_Data; + // UInt32 NtReparse_Size; // FString _copyFile_Path; // HRESULT MyCopyFile(ISequentialOutStream *outStream); - HRESULT Link(const FString &fullProcessedPath); HRESULT ReadLink(); + HRESULT SetLink( + const FString &fullProcessedPath_from, + const CLinkInfo &linkInfo, + bool &linkWasSet); + HRESULT SetPostLinks() const; public: - // call PrepareHardLinks() after Init() + HRESULT CreateHardLink2(const FString &newFilePath, + const FString &existFilePath, bool &link_was_Created) const; + HRESULT DeleteLinkFileAlways_or_RemoveEmptyDir(const FString &path, bool checkThatFileIsEmpty) const; HRESULT PrepareHardLinks(const CRecordVector *realIndices); // NULL means all items +#endif - #endif - +private: #ifdef SUPPORT_ALT_STREAMS CObjectVector _renamedFiles; @@ -506,6 +573,7 @@ public: // call it after Init() +public: #ifndef Z7_SFX void SetBaseParentFolderIndex(UInt32 indexInArc) { @@ -527,7 +595,6 @@ private: HRESULT Read_fi_Props(); void CorrectPathParts(); - void GetFiTimesCAM(CFiTimesCAM &pt); void CreateFolders(); HRESULT CheckExistFile(FString &fullProcessedPath, bool &needExit); @@ -536,18 +603,8 @@ private: HRESULT CloseFile(); HRESULT CloseReparseAndFile(); - HRESULT CloseReparseAndFile2(); HRESULT SetDirsTimes(); - - const void *NtReparse_Data; - UInt32 NtReparse_Size; - - #ifdef SUPPORT_LINKS - HRESULT SetFromLinkPath( - const FString &fullProcessedPath, - const CLinkInfo &linkInfo, - bool &linkWasSet); - #endif + HRESULT SetSecurityInfo(UInt32 indexInArc, const FString &path) const; }; diff --git a/CPP/7zip/UI/Common/Bench.cpp b/CPP/7zip/UI/Common/Bench.cpp index b3a8b83..deacf4a 100644 --- a/CPP/7zip/UI/Common/Bench.cpp +++ b/CPP/7zip/UI/Common/Bench.cpp @@ -871,14 +871,27 @@ struct CAffinityMode unsigned NumCoreThreads; unsigned NumCores; // unsigned DivideNum; + +#ifdef _WIN32 + unsigned NumGroups; +#endif + UInt32 Sizes[NUM_CPU_LEVELS_MAX]; void SetLevels(unsigned numCores, unsigned numCoreThreads); DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const; bool NeedAffinity() const { return NumBundleThreads != 0; } +#ifdef _WIN32 + bool NeedGroupsMode() const { return NumGroups > 1; } +#endif + WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const { +#ifdef _WIN32 + if (NeedGroupsMode()) // we need fix for bundleIndex usage + return thread.Create_With_Group(startAddress, parameter, bundleIndex % NumGroups); +#endif if (NeedAffinity()) { CCpuSet cpuSet; @@ -892,6 +905,9 @@ struct CAffinityMode NumBundleThreads(0), NumLevels(0), NumCoreThreads(1) +#ifdef _WIN32 + , NumGroups(0) +#endif // DivideNum(1) {} }; @@ -1288,22 +1304,28 @@ HRESULT CEncoderInfo::Generate() if (scp) { const UInt64 reduceSize = kBufferSize; - - /* in posix new thread uses same affinity as parent thread, + /* in posix : new thread uses same affinity as parent thread, so we don't need to send affinity to coder in posix */ - UInt64 affMask; - #if !defined(Z7_ST) && defined(_WIN32) + UInt64 affMask = 0; + UInt32 affinityGroup = (UInt32)(Int32)-1; + // UInt64 affinityInGroup = 0; +#if !defined(Z7_ST) && defined(_WIN32) { CCpuSet cpuSet; - affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet); + if (AffinityMode.NeedGroupsMode()) // we need fix for affinityInGroup also + affinityGroup = EncoderIndex % AffinityMode.NumGroups; + else + affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet); } - #else - affMask = 0; - #endif - // affMask <<= 3; // debug line: to test no affinity in coder; - // affMask = 0; - - RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL))) +#endif + // affMask <<= 3; // debug line: to test no affinity in coder + // affMask = 0; // for debug + // affinityGroup = 0; // for debug + // affinityInGroup = 1; // for debug + RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, + affMask != 0 ? &affMask : NULL, + affinityGroup != (UInt32)(Int32)-1 ? &affinityGroup : NULL, + /* affinityInGroup != 0 ? &affinityInGroup : */ NULL)) } else { @@ -2962,7 +2984,7 @@ AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti) { AString s; // s.Add_UInt32(ti.numProcessThreads); - unsigned numSysThreads = ti.GetNumSystemThreads(); + const unsigned numSysThreads = ti.GetNumSystemThreads(); if (ti.GetNumProcessThreads() != numSysThreads) { // if (ti.numProcessThreads != ti.numSysThreads) @@ -2992,6 +3014,35 @@ AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti) } #endif } +#ifdef _WIN32 + if (ti.Groups.GroupSizes.Size() > 1 || + (ti.Groups.GroupSizes.Size() == 1 + && ti.Groups.NumThreadsTotal != numSysThreads)) + { + s += " : "; + s.Add_UInt32(ti.Groups.GroupSizes.Size()); + s += " groups : "; + if (ti.Groups.NumThreadsTotal == numSysThreads) + { + s.Add_UInt32(ti.Groups.NumThreadsTotal); + s += " c : "; + } + UInt32 minSize, maxSize; + ti.Groups.Get_GroupSize_Min_Max(minSize, maxSize); + if (minSize == maxSize) + { + s.Add_UInt32(ti.Groups.GroupSizes[0]); + s += " c/g"; + } + else + FOR_VECTOR (i, ti.Groups.GroupSizes) + { + if (i != 0) + s.Add_Char(' '); + s.Add_UInt32(ti.Groups.GroupSizes[i]); + } + } +#endif return s; } @@ -3753,9 +3804,13 @@ HRESULT Bench( UInt64 complexInCommands = kComplexInCommands; UInt32 numThreads_Start = 1; - #ifndef Z7_ST +#ifndef Z7_ST CAffinityMode affinityMode; - #endif +#ifdef _WIN32 + if (threadsInfo.IsGroupMode && threadsInfo.Groups.GroupSizes.Size() > 1) + affinityMode.NumGroups = threadsInfo.Groups.GroupSizes.Size(); +#endif +#endif COneMethodInfo method; @@ -4861,7 +4916,7 @@ HRESULT Bench( if (AreSameMethodNames(benchMethod, methodName)) { if (benchProps.IsEmpty() - || (benchProps == "x5" && method.PropsString.IsEmpty()) + || (benchProps.IsEqualTo("x5") && method.PropsString.IsEmpty()) || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps)) { callback.BenchProps.EncComplex = h.EncComplex; diff --git a/CPP/7zip/UI/Common/EnumDirItems.cpp b/CPP/7zip/UI/Common/EnumDirItems.cpp index 8b105ae..3049a91 100644 --- a/CPP/7zip/UI/Common/EnumDirItems.cpp +++ b/CPP/7zip/UI/Common/EnumDirItems.cpp @@ -1213,11 +1213,13 @@ HRESULT CDirItems::FillFixedReparse() // continue; // for debug if (!item.Has_Attrib_ReparsePoint()) continue; - + /* + We want to get properties of target file instead of properies of symbolic link. + Probably this code is unused, because + CFileInfo::Find(with followLink = true) called Fill_From_ByHandleFileInfo() already. + */ // if (item.IsDir()) continue; - const FString phyPath = GetPhyPath(i); - NFind::CFileInfo fi; if (fi.Fill_From_ByHandleFileInfo(phyPath)) // item.IsDir() { @@ -1228,38 +1230,13 @@ HRESULT CDirItems::FillFixedReparse() item.Attrib = fi.Attrib; continue; } - - /* - // we request properties of target file instead of properies of symbolic link - // here we also can manually parse unsupported links (like WSL links) - NIO::CInFile inFile; - if (inFile.Open(phyPath)) - { - BY_HANDLE_FILE_INFORMATION info; - if (inFile.GetFileInformation(&info)) - { - // Stat.FilesSize doesn't contain item.Size already - // Stat.FilesSize -= item.Size; - item.Size = (((UInt64)info.nFileSizeHigh) << 32) + info.nFileSizeLow; - Stat.FilesSize += item.Size; - item.CTime = info.ftCreationTime; - item.ATime = info.ftLastAccessTime; - item.MTime = info.ftLastWriteTime; - item.Attrib = info.dwFileAttributes; - continue; - } - } - */ - RINOK(AddError(phyPath)) continue; } - // (SymLinks == true) here - + // (SymLinks == true) if (item.ReparseData.Size() == 0) continue; - // if (item.Size == 0) { // 20.03: we use Reparse Data instead of real data @@ -1277,7 +1254,7 @@ HRESULT CDirItems::FillFixedReparse() /* imagex/WIM reduces absolute paths in links (raparse data), if we archive non root folder. We do same thing here */ - bool isWSL = false; + // bool isWSL = false; if (attr.IsSymLink_WSL()) { // isWSL = true; @@ -1314,21 +1291,27 @@ HRESULT CDirItems::FillFixedReparse() continue; if (rootPrefixSize == prefix.Len()) continue; // simple case: paths are from root - if (link.Len() <= prefix.Len()) continue; - if (CompareFileNames(link.Left(prefix.Len()), prefix) != 0) continue; UString newLink = prefix.Left(rootPrefixSize); newLink += link.Ptr(prefix.Len()); - CByteBuffer data; - bool isSymLink = !attr.IsMountPoint(); - if (!FillLinkData(data, newLink, isSymLink, isWSL)) + CByteBuffer &data = item.ReparseData2; +/* + if (isWSL) + { + Convert_WinPath_to_WslLinuxPath(newLink, true); // is absolute : change it + FillLinkData_WslLink(data, newLink); + } + else +*/ + FillLinkData_WinLink(data, newLink, !attr.IsMountPoint()); + if (data.Size() == 0) continue; - item.ReparseData2 = data; + // item.ReparseData2 = data; } return S_OK; } diff --git a/CPP/7zip/UI/Common/Extract.cpp b/CPP/7zip/UI/Common/Extract.cpp index 325a08c..7d733a3 100644 --- a/CPP/7zip/UI/Common/Extract.cpp +++ b/CPP/7zip/UI/Common/Extract.cpp @@ -389,7 +389,7 @@ HRESULT Extract( { UString s = arcPath.Ptr(pos + 1); int index = codecs->FindFormatForExtension(s); - if (index >= 0 && s == L"001") + if (index >= 0 && s.IsEqualTo("001")) { s = arcPath.Left(pos); pos = s.ReverseFind(L'.'); diff --git a/CPP/7zip/UI/Common/ExtractingFilePath.cpp b/CPP/7zip/UI/Common/ExtractingFilePath.cpp index 57dd9c0..91753af 100644 --- a/CPP/7zip/UI/Common/ExtractingFilePath.cpp +++ b/CPP/7zip/UI/Common/ExtractingFilePath.cpp @@ -208,7 +208,7 @@ void Correct_FsPath(bool absIsAllowed, bool keepAndReplaceEmptyPrefixes, UString if (parts.Size() > 1 && parts[1].IsEmpty()) { i = 2; - if (parts.Size() > 2 && parts[2] == L"?") + if (parts.Size() > 2 && parts[2].IsEqualTo("?")) { i = 3; if (parts.Size() > 3 && NWindows::NFile::NName::IsDrivePath2(parts[3])) diff --git a/CPP/7zip/UI/Common/HashCalc.cpp b/CPP/7zip/UI/Common/HashCalc.cpp index c28c056..3fdf8e6 100644 --- a/CPP/7zip/UI/Common/HashCalc.cpp +++ b/CPP/7zip/UI/Common/HashCalc.cpp @@ -62,7 +62,7 @@ HRESULT CHashBundle::SetMethods(DECL_EXTERNAL_CODECS_LOC_VARS const UStringVecto if (m.MethodName.IsEmpty()) m.MethodName = k_DefaultHashMethod; - if (m.MethodName == "*") + if (m.MethodName.IsEqualTo("*")) { CRecordVector tempMethods; GetHashMethods(EXTERNAL_CODECS_LOC_VARS tempMethods); @@ -431,6 +431,19 @@ static void WriteLine(CDynLimBuf &hashFileString, } +static void Convert_TagName_to_MethodName(AString &method) +{ + // we need to convert at least SHA512/256 to SHA512-256, and SHA512/224 to SHA512-224 + // but we convert any '/' to '-'. + method.Replace('/', '-'); +} + +static void Convert_MethodName_to_TagName(AString &method) +{ + if (method.IsPrefixedBy_Ascii_NoCase("SHA512-2")) + method.ReplaceOneCharAtPos(6, '/'); +} + static void WriteLine(CDynLimBuf &hashFileString, const CHashOptionsLocal &options, @@ -440,8 +453,10 @@ static void WriteLine(CDynLimBuf &hashFileString, { AString methodName; if (!hb.Hashers.IsEmpty()) + { methodName = hb.Hashers[0].Name; - + Convert_MethodName_to_TagName(methodName); + } AString hashesString; AddHashResultLine(hashesString, hb.Hashers); WriteLine(hashFileString, options, path, isDir, methodName, hashesString); @@ -752,7 +767,7 @@ bool CHashPair::ParseCksum(const char *s) Name = end; Hash.Alloc(4); - SetBe32(Hash, crc) + SetBe32a(Hash, crc) Size_from_Arc = size; Size_from_Arc_Defined = true; @@ -773,56 +788,87 @@ static const char * const k_CsumMethodNames[] = { "sha256" , "sha224" -// , "sha512-224" -// , "sha512-256" + , "sha512-224" + , "sha512-256" , "sha384" , "sha512" -// , "sha3-224" + , "sha3-224" , "sha3-256" -// , "sha3-384" -// , "sha3-512" + , "sha3-384" + , "sha3-512" // , "shake128" // , "shake256" , "sha1" + , "sha2" + , "sha3" + , "sha" , "md5" - , "blake2sp" + , "blake2s" , "blake2b" + , "blake2sp" , "xxh64" - , "crc64" , "crc32" + , "crc64" , "cksum" }; -static UString GetMethod_from_FileName(const UString &name) + +// returns true, if (method) is known hash method or hash method group name. +static bool GetMethod_from_FileName(const UString &name, AString &method) { + method.Empty(); AString s; ConvertUnicodeToUTF8(name, s); const int dotPos = s.ReverseFind_Dot(); - const char *src = s.Ptr(); - bool isExtension = false; if (dotPos >= 0) { - isExtension = true; - src = s.Ptr(dotPos + 1); + method = s.Ptr(dotPos + 1); + if (method.IsEqualTo_Ascii_NoCase("txt") || + method.IsEqualTo_Ascii_NoCase("asc")) + { + method.Empty(); + const int dotPos2 = s.Find('.'); + if (dotPos2 >= 0) + s.DeleteFrom(dotPos2); + } } - const char *m = ""; + if (method.IsEmpty()) + { + // we support file names with "sum" and "sums" postfixes: "sha256sum", "sha256sums" + unsigned size; + if (s.Len() > 4 && StringsAreEqualNoCase_Ascii(s.RightPtr(4), "sums")) + size = 4; + else if (s.Len() > 3 && StringsAreEqualNoCase_Ascii(s.RightPtr(3), "sum")) + size = 3; + else + return false; + method = s; + method.DeleteFrom(s.Len() - size); + } + unsigned i; for (i = 0; i < Z7_ARRAY_SIZE(k_CsumMethodNames); i++) { - m = k_CsumMethodNames[i]; - if (isExtension) + const char *m = k_CsumMethodNames[i]; + if (method.IsEqualTo_Ascii_NoCase(m)) { - if (StringsAreEqual_Ascii(src, m)) - break; + // method = m; // we can get lowcase + return true; } - else if (IsString1PrefixedByString2_NoCase_Ascii(src, m)) - if (StringsAreEqual_Ascii(src + strlen(m), "sums")) - break; } - UString res; - if (i != Z7_ARRAY_SIZE(k_CsumMethodNames)) - res = m; - return res; + +/* + for (i = 0; i < Z7_ARRAY_SIZE(k_CsumMethodNames); i++) + { + const char *m = k_CsumMethodNames[i]; + if (method.IsPrefixedBy_Ascii_NoCase(m)) + { + method = m; // we get lowcase + return true; + } + } +*/ + return false; } @@ -1047,7 +1093,7 @@ Z7_COM7F_IMF(CHandler::GetRawProp(UInt32 index, PROPID propID, const void **data if (propID == kpidChecksum) { const CHashPair &hp = HashPairs[index]; - if (hp.Hash.Size() > 0) + if (hp.Hash.Size() != 0) { *data = hp.Hash; *dataSize = (UInt32)hp.Hash.Size(); @@ -1100,11 +1146,6 @@ Z7_COM7F_IMF(CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value)) s.Add_UInt32(_hashSize * 8); s += "-bit"; } - if (!_nameExtenstion.IsEmpty()) - { - s.Add_Space_if_NotEmpty(); - s += _nameExtenstion; - } if (_is_PgpMethod) { Add_OptSpace_String(s, "PGP"); @@ -1120,6 +1161,18 @@ Z7_COM7F_IMF(CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value)) Add_OptSpace_String(s, "TAG"); if (_are_there_Dirs) Add_OptSpace_String(s, "DIRS"); + if (!_method_from_FileName.IsEmpty()) + { + Add_OptSpace_String(s, "filename_method:"); + s += _method_from_FileName; + if (!_is_KnownMethod_in_FileName) + s += ":UNKNOWN"; + } + if (!_methods.IsEmpty()) + { + Add_OptSpace_String(s, "cmd_method:"); + s += _methods[0]; + } prop = s; break; } @@ -1228,6 +1281,15 @@ static HRESULT ReadStream_to_Buf(IInStream *stream, CByteBuffer &buf, IArchiveOp } +static bool isThere_Zero_Byte(const Byte *data, size_t size) +{ + for (size_t i = 0; i < size; i++) + if (data[i] == 0) + return true; + return false; +} + + Z7_COM7F_IMF(CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallback *openCallback)) { COM_TRY_BEGIN @@ -1239,17 +1301,9 @@ Z7_COM7F_IMF(CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallb CObjectVector &pairs = HashPairs; - bool zeroMode = false; - bool cr_lf_Mode = false; - { - for (size_t i = 0; i < buf.Size(); i++) - if (buf.ConstData()[i] == 0) - { - zeroMode = true; - break; - } - } + const bool zeroMode = isThere_Zero_Byte(buf, buf.Size()); _is_ZeroMode = zeroMode; + bool cr_lf_Mode = false; if (!zeroMode) cr_lf_Mode = Is_CR_LF_Data(buf, buf.Size()); @@ -1263,13 +1317,21 @@ Z7_COM7F_IMF(CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallb NCOM::CPropVariant prop; RINOK(openVolumeCallback->GetProperty(kpidName, &prop)) if (prop.vt == VT_BSTR) - _nameExtenstion = GetMethod_from_FileName(prop.bstrVal); + _is_KnownMethod_in_FileName = GetMethod_from_FileName(prop.bstrVal, _method_from_FileName); } } - bool cksumMode = false; - if (_nameExtenstion.IsEqualTo_Ascii_NoCase("cksum")) - cksumMode = true; + if (!_methods.IsEmpty()) + { + ConvertUnicodeToUTF8(_methods[0], _method_for_Extraction); + } + if (_method_for_Extraction.IsEmpty()) + { + // if (_is_KnownMethod_in_FileName) + _method_for_Extraction = _method_from_FileName; + } + + const bool cksumMode = _method_for_Extraction.IsEqualTo_Ascii_NoCase("cksum"); _is_CksumMode = cksumMode; size_t pos = 0; @@ -1366,6 +1428,7 @@ void CHandler::ClearVars() _is_ZeroMode = false; _are_there_Tags = false; _are_there_Dirs = false; + _is_KnownMethod_in_FileName = false; _hashSize_Defined = false; _hashSize = 0; } @@ -1374,7 +1437,8 @@ void CHandler::ClearVars() Z7_COM7F_IMF(CHandler::Close()) { ClearVars(); - _nameExtenstion.Empty(); + _method_from_FileName.Empty(); + _method_for_Extraction.Empty(); _pgpMethod.Empty(); HashPairs.Clear(); return S_OK; @@ -1401,19 +1465,73 @@ static bool CheckDigests(const Byte *a, const Byte *b, size_t size) } -static void AddDefaultMethod(UStringVector &methods, unsigned size) +static void AddDefaultMethod(UStringVector &methods, + const char *name, unsigned size) { + int shaVersion = -1; + if (name) + { + if (StringsAreEqualNoCase_Ascii(name, "sha")) + { + shaVersion = 0; + if (size == 0) + size = 32; + } + else if (StringsAreEqualNoCase_Ascii(name, "sha1")) + { + shaVersion = 1; + if (size == 0) + size = 20; + } + else if (StringsAreEqualNoCase_Ascii(name, "sha2")) + { + shaVersion = 2; + if (size == 0) + size = 32; + } + else if (StringsAreEqualNoCase_Ascii(name, "sha3")) + { + if (size == 0 || + size == 32) name = "sha3-256"; + else if (size == 28) name = "sha3-224"; + else if (size == 48) name = "sha3-384"; + else if (size == 64) name = "sha3-512"; + } + else if (StringsAreEqualNoCase_Ascii(name, "sha512")) + { + // we allow any sha512 derived hash inside .sha512 file: + if (size == 48) name = "sha384"; + else if (size == 32) name = "sha512-256"; + else if (size == 28) name = "sha512-224"; + } + if (shaVersion >= 0) + name = NULL; + } + const char *m = NULL; - if (size == 32) m = "sha256"; - else if (size == 20) m = "sha1"; - else if (size == 16) m = "md5"; - else if (size == 8) m = "crc64"; - else if (size == 4) m = "crc32"; + if (name) + m = name; else + { + if (size == 64) m = "sha512"; + else if (size == 48) m = "sha384"; + else if (size == 32) m = "sha256"; + else if (size == 28) m = "sha224"; + else if (size == 20) m = "sha1"; + else if (shaVersion < 0) + { + if (size == 16) m = "md5"; + else if (size == 8) m = "crc64"; + else if (size == 4) m = "crc32"; + } + } + + if (!m) return; - #ifdef Z7_EXTERNAL_CODECS + +#ifdef Z7_EXTERNAL_CODECS const CExternalCodecs *_externalCodecs = g_ExternalCodecs_Ptr; - #endif +#endif CMethodId id; if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS AString(m), id)) @@ -1444,15 +1562,15 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, CHashBundle hb_Glob; // UStringVector methods = options.Methods; UStringVector methods; - - if (methods.IsEmpty() && !_nameExtenstion.IsEmpty()) + +/* + if (methods.IsEmpty() && !utf_nameExtenstion.IsEmpty() && !_hashSize_Defined) { - AString utf; - ConvertUnicodeToUTF8(_nameExtenstion, utf); CMethodId id; - if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS utf, id)) + if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS utf_nameExtenstion, id)) methods.Add(_nameExtenstion); } +*/ if (methods.IsEmpty() && !_pgpMethod.IsEmpty()) { @@ -1461,12 +1579,21 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, methods.Add(UString(_pgpMethod)); } +/* if (methods.IsEmpty() && _pgpMethod.IsEmpty() && _hashSize_Defined) - AddDefaultMethod(methods, _hashSize); + { + AddDefaultMethod(methods, + utf_nameExtenstion.IsEmpty() ? NULL : utf_nameExtenstion.Ptr(), + _hashSize); + } +*/ - RINOK(hb_Glob.SetMethods( + if (!methods.IsEmpty()) + { + RINOK(hb_Glob.SetMethods( EXTERNAL_CODECS_LOC_VARS methods)) + } Z7_DECL_CMyComPtr_QI_FROM( IArchiveUpdateCallbackFile, @@ -1561,9 +1688,11 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, { hb_Use = &hb_Loc; CMethodId id; - if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS hp.Method, id)) + AString methodName = hp.Method; + Convert_TagName_to_MethodName(methodName); + if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, id)) { - methods_loc.Add(UString(hp.Method)); + methods_loc.Add(UString(methodName)); RINOK(hb_Loc.SetMethods( EXTERNAL_CODECS_LOC_VARS methods_loc)) @@ -1573,7 +1702,10 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, } else if (methods.IsEmpty()) { - AddDefaultMethod(methods_loc, (unsigned)hp.Hash.Size()); + AddDefaultMethod(methods_loc, + _method_for_Extraction.IsEmpty() ? NULL : + _method_for_Extraction.Ptr(), + (unsigned)hp.Hash.Size()); if (!methods_loc.IsEmpty()) { hb_Use = &hb_Loc; @@ -1621,7 +1753,7 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, Int32 opRes = NArchive::NExtract::NOperationResult::kUnsupportedMethod; if (isSupportedMode && res_SetMethods != E_NOTIMPL - && hb_Use->Hashers.Size() > 0 + && !hb_Use->Hashers.IsEmpty() ) { const CHasherState &hs = hb_Use->Hashers[0]; @@ -1774,10 +1906,6 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt methods.Add(_methods[k]); } } - else if (_crcSize_WasSet) - { - AddDefaultMethod(methods, _crcSize); - } else { Z7_DECL_CMyComPtr_QI_FROM( @@ -1789,12 +1917,23 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt RINOK(getRootProps->GetRootProp(kpidArcFileName, &prop)) if (prop.vt == VT_BSTR) { - const UString method = GetMethod_from_FileName(prop.bstrVal); + AString method; + /* const bool isKnownMethod = */ GetMethod_from_FileName(prop.bstrVal, method); if (!method.IsEmpty()) - methods.Add(method); + { + AddDefaultMethod(methods, method, _crcSize_WasSet ? _crcSize : 0); + if (methods.IsEmpty()) + return E_NOTIMPL; + } } } } + if (methods.IsEmpty() && _crcSize_WasSet) + { + AddDefaultMethod(methods, + NULL, // name + _crcSize); + } RINOK(hb.SetMethods(EXTERNAL_CODECS_LOC_VARS methods)) @@ -2038,6 +2177,15 @@ HRESULT CHandler::SetProperty(const wchar_t *nameSpec, const PROPVARIANT &value) } +void CHandler::InitProps() +{ + _supportWindowsBackslash = true; + _crcSize_WasSet = false; + _crcSize = 4; + _methods.Clear(); + _options.Init_HashOptionsLocal(); +} + Z7_COM7F_IMF(CHandler::SetProperties(const wchar_t * const *names, const PROPVARIANT *values, UInt32 numProps)) { COM_TRY_BEGIN @@ -2088,22 +2236,27 @@ void Codecs_AddHashArcHandler(CCodecs *codecs) " sha512" " sha384" " sha224" - // " sha512-224" - // " sha512-256" - // " sha3-224" + " sha512-224" + " sha512-256" + " sha3-224" " sha3-256" - // " sha3-384" - // " sha3-512" + " sha3-384" + " sha3-512" // " shake128" // " shake256" " sha1" + " sha2" + " sha3" " sha" " md5" + " blake2s" + " blake2b" " blake2sp" " xxh64" - " crc32 crc64" - " asc" + " crc32" + " crc64" " cksum" + " asc" // " b2sum" ), UString()); diff --git a/CPP/7zip/UI/Common/HashCalc.h b/CPP/7zip/UI/Common/HashCalc.h index 8867188..a1e3191 100644 --- a/CPP/7zip/UI/Common/HashCalc.h +++ b/CPP/7zip/UI/Common/HashCalc.h @@ -279,32 +279,25 @@ Z7_CLASS_IMP_CHandler_IInArchive_3( bool _isArc; bool _supportWindowsBackslash; bool _crcSize_WasSet; - UInt64 _phySize; - CObjectVector HashPairs; - UString _nameExtenstion; - // UString _method_fromName; - AString _pgpMethod; bool _is_CksumMode; bool _is_PgpMethod; bool _is_ZeroMode; bool _are_there_Tags; bool _are_there_Dirs; + bool _is_KnownMethod_in_FileName; bool _hashSize_Defined; unsigned _hashSize; UInt32 _crcSize; + UInt64 _phySize; + CObjectVector HashPairs; UStringVector _methods; + AString _method_from_FileName; + AString _pgpMethod; + AString _method_for_Extraction; CHashOptionsLocal _options; void ClearVars(); - - void InitProps() - { - _supportWindowsBackslash = true; - _crcSize_WasSet = false; - _crcSize = 4; - _methods.Clear(); - _options.Init_HashOptionsLocal(); - } + void InitProps(); bool CanUpdate() const { diff --git a/CPP/7zip/UI/Common/LoadCodecs.cpp b/CPP/7zip/UI/Common/LoadCodecs.cpp index 999d3ec..bd5fb06 100644 --- a/CPP/7zip/UI/Common/LoadCodecs.cpp +++ b/CPP/7zip/UI/Common/LoadCodecs.cpp @@ -170,7 +170,7 @@ void CArcInfoEx::AddExts(const UString &ext, const UString &addExt) if (i < addExts.Size()) { extInfo.AddExt = addExts[i]; - if (extInfo.AddExt == L"*") + if (extInfo.AddExt.IsEqualTo("*")) extInfo.AddExt.Empty(); } Exts.Add(extInfo); @@ -931,8 +931,8 @@ bool CCodecs::FindFormatForArchiveType(const UString &arcType, CIntVector &forma const UString name = arcType.Mid(pos, (unsigned)pos2 - pos); if (name.IsEmpty()) return false; - int index = FindFormatForArchiveType(name); - if (index < 0 && name != L"*") + const int index = FindFormatForArchiveType(name); + if (index < 0 && !name.IsEqualTo("*")) { formatIndices.Clear(); return false; diff --git a/CPP/7zip/UI/Common/Update.cpp b/CPP/7zip/UI/Common/Update.cpp index d404de0..5db152c 100644 --- a/CPP/7zip/UI/Common/Update.cpp +++ b/CPP/7zip/UI/Common/Update.cpp @@ -474,7 +474,7 @@ static HRESULT Compress( CArcToDoStat stat2; - if (options.RenamePairs.Size() != 0) + if (options.RenameMode || options.RenamePairs.Size() != 0) { FOR_VECTOR (i, arcItems) { @@ -1920,7 +1920,7 @@ Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION if (NFind::DoesDirExist(phyPath)) { RINOK(callback->DeletingAfterArchiving(phyPath, true)) - RemoveDir(phyPath); + RemoveDirAlways_if_Empty(phyPath); } } diff --git a/CPP/7zip/UI/Common/Update.h b/CPP/7zip/UI/Common/Update.h index bc8398b..5736dd5 100644 --- a/CPP/7zip/UI/Common/Update.h +++ b/CPP/7zip/UI/Common/Update.h @@ -94,6 +94,7 @@ struct CUpdateOptions bool DeleteAfterCompressing; bool SetArcMTime; + bool RenameMode; CBoolPair NtSecurity; CBoolPair AltStreams; @@ -139,6 +140,7 @@ struct CUpdateOptions DeleteAfterCompressing(false), SetArcMTime(false), + RenameMode(false), ArcNameMode(k_ArcNameMode_Smart), PathMode(NWildcard::k_RelatPath) diff --git a/CPP/7zip/UI/Common/UpdateCallback.cpp b/CPP/7zip/UI/Common/UpdateCallback.cpp index 0313756..1e14912 100644 --- a/CPP/7zip/UI/Common/UpdateCallback.cpp +++ b/CPP/7zip/UI/Common/UpdateCallback.cpp @@ -32,6 +32,7 @@ #include "../../../Windows/PropVariant.h" #include "../../Common/StreamObjects.h" +#include "../../Archive/Common/ItemNameUtils.h" #include "UpdateCallback.h" @@ -306,7 +307,7 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetRawProp(UInt32 index, PROPID propID, con #if defined(_WIN32) && !defined(UNDER_CE) -static UString GetRelativePath(const UString &to, const UString &from) +static UString GetRelativePath(const UString &to, const UString &from, bool isWSL) { UStringVector partsTo, partsFrom; SplitPathToParts(to, partsTo); @@ -324,11 +325,12 @@ static UString GetRelativePath(const UString &to, const UString &from) if (i == 0) { - #ifdef _WIN32 - if (NName::IsDrivePath(to) || - NName::IsDrivePath(from)) +#ifdef _WIN32 + if (isWSL || + (NName::IsDrivePath(to) || + NName::IsDrivePath(from))) return to; - #endif +#endif } UString s; @@ -373,54 +375,87 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetProperty(UInt32 index, PROPID propID, PR return S_OK; } - #if !defined(UNDER_CE) - +#if !defined(UNDER_CE) if (up.DirIndex >= 0) { const CDirItem &di = DirItems->Items[(unsigned)up.DirIndex]; - - #ifdef _WIN32 - // if (di.IsDir()) + if (di.ReparseData.Size()) { +#ifdef _WIN32 CReparseAttr attr; if (attr.Parse(di.ReparseData, di.ReparseData.Size())) { - const UString simpleName = attr.GetPath(); - if (!attr.IsSymLink_WSL() && attr.IsRelative_Win()) - prop = simpleName; - else + UString path = attr.GetPath(); + if (!path.IsEmpty()) { - const FString phyPath = DirItems->GetPhyPath((unsigned)up.DirIndex); - FString fullPath; - if (NDir::MyGetFullPathName(phyPath, fullPath)) + bool isWSL = attr.IsSymLink_WSL(); + if (isWSL) + NArchive::NItemName::ReplaceToWinSlashes(path, true); // useBackslashReplacement + // it's expected that (path) now uses windows slashes. + // CReparseAttr::IsRelative_Win() returns true if FLAG_RELATIVE is set + // CReparseAttr::IsRelative_Win() returns true for "\dir1\path" + // but we want to store real relative paths without "\" root prefix. + // so we parse path instead of IsRelative_Win() calling. + if (// attr.IsRelative_Win() || + (isWSL ? + IS_PATH_SEPAR(path[0]) : + NName::IsAbsolutePath(path))) { - prop = GetRelativePath(simpleName, fs2us(fullPath)); + // (path) is abolute path or relative to root: "\path" + // we try to convert (path) to relative path for writing to archive. + const FString phyPath = DirItems->GetPhyPath((unsigned)up.DirIndex); + FString fullPath; + if (NDir::MyGetFullPathName(phyPath, fullPath)) + { + if (IS_PATH_SEPAR(path[0]) && + !IS_PATH_SEPAR(path[1])) + { + // path is relative to root of (fullPath): "\path" + const unsigned prefixSize = NName::GetRootPrefixSize(fullPath); + if (prefixSize) + { + path.DeleteFrontal(1); + path.Insert(0, fs2us(fullPath.Left(prefixSize))); + // we have changed "\" prefix to drive prefix "c:\" in (path). + // (path) is Windows path now. + isWSL = false; + } + } + } + path = GetRelativePath(path, fs2us(fullPath), isWSL); } +#if WCHAR_PATH_SEPARATOR != L'/' + // 7-Zip's TAR handler in Windows replaces windows slashes to linux slashes. + // so we can return any slashes to TAR handler. + // or we can convert to linux slashes here, + // because input IInArchive handler uses linux slashes for kpidSymLink. + // path.Replace(WCHAR_PATH_SEPARATOR, L'/'); +#endif + if (!path.IsEmpty()) + prop = path; } - prop.Detach(value); - return S_OK; } - } - - #else // _WIN32 - - if (di.ReparseData.Size() != 0) - { +#else // ! _WIN32 AString utf; utf.SetFrom_CalcLen((const char *)(const Byte *)di.ReparseData, (unsigned)di.ReparseData.Size()); - + #if 0 // 0 - for debug + // it's expected that link data uses system codepage. + // fs2us() ignores conversion errors. But we want correct path + UString us (fs2us(utf)); + #else UString us; if (ConvertUTF8ToUnicode(utf, us)) + #endif { - prop = us; - prop.Detach(value); - return S_OK; + if (!us.IsEmpty()) + prop = us; } +#endif // ! _WIN32 } - - #endif // _WIN32 + prop.Detach(value); + return S_OK; } - #endif // !defined(UNDER_CE) +#endif // !defined(UNDER_CE) } else if (propID == kpidHardLink) { @@ -428,7 +463,12 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetProperty(UInt32 index, PROPID propID, PR { const CKeyKeyValPair &pair = _map[_hardIndex_To]; const CUpdatePair2 &up2 = (*UpdatePairs)[pair.Value]; - prop = DirItems->GetLogPath((unsigned)up2.DirIndex); + const UString path = DirItems->GetLogPath((unsigned)up2.DirIndex); +#if WCHAR_PATH_SEPARATOR != L'/' + // 7-Zip's TAR handler in Windows replaces windows slashes to linux slashes. + // path.Replace(WCHAR_PATH_SEPARATOR, L'/'); +#endif + prop = path; prop.Detach(value); return S_OK; } @@ -438,7 +478,7 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetProperty(UInt32 index, PROPID propID, PR return S_OK; } } - } + } // if (up.NewData) if (up.IsAnti && propID != kpidIsDir diff --git a/CPP/7zip/UI/Console/Main.cpp b/CPP/7zip/UI/Console/Main.cpp index 4fa5c35..5094452 100644 --- a/CPP/7zip/UI/Console/Main.cpp +++ b/CPP/7zip/UI/Console/Main.cpp @@ -908,9 +908,12 @@ int Main2( if (options.EnableHeaders) { - ShowCopyrightAndHelp(g_StdStream, false); - if (!parser.Parse1Log.IsEmpty()) - *g_StdStream << parser.Parse1Log; + if (g_StdStream) + { + ShowCopyrightAndHelp(g_StdStream, false); + if (!parser.Parse1Log.IsEmpty()) + *g_StdStream << parser.Parse1Log; + } } parser.Parse2(options); diff --git a/CPP/7zip/UI/Console/makefile b/CPP/7zip/UI/Console/makefile index d292726..38b9004 100644 --- a/CPP/7zip/UI/Console/makefile +++ b/CPP/7zip/UI/Console/makefile @@ -59,10 +59,10 @@ COMPRESS_OBJS = \ C_OBJS = $(C_OBJS) \ $O\Alloc.obj \ $O\CpuArch.obj \ - $O\Sort.obj \ $O\Threads.obj \ !include "../../Crc.mak" +!include "../../Sort.mak" !include "Console.mak" !include "../../7zip.mak" diff --git a/CPP/Build.mak b/CPP/Build.mak index 81992d4..6a400af 100644 --- a/CPP/Build.mak +++ b/CPP/Build.mak @@ -111,7 +111,13 @@ CFLAGS = $(CFLAGS) -Zc:forScope !IFNDEF UNDER_CE !IF "$(CC)" != "clang-cl" -CFLAGS = $(CFLAGS) -MP4 +MP_NPROC = 16 +!IFDEF NUMBER_OF_PROCESSORS +!IF $(NUMBER_OF_PROCESSORS) < $(MP_NPROC) +MP_NPROC = $(NUMBER_OF_PROCESSORS) +!ENDIF +!ENDIF +CFLAGS = $(CFLAGS) -MP$(MP_NPROC) !ENDIF !IFNDEF PLATFORM # CFLAGS = $(CFLAGS) -arch:IA32 diff --git a/CPP/Common/MyString.cpp b/CPP/Common/MyString.cpp index e10472a..dafe843 100644 --- a/CPP/Common/MyString.cpp +++ b/CPP/Common/MyString.cpp @@ -208,35 +208,6 @@ bool StringsAreEqualNoCase(const wchar_t *s1, const wchar_t *s2) throw() // ---------- ASCII ---------- -bool AString::IsPrefixedBy_Ascii_NoCase(const char *s) const throw() -{ - const char *s1 = _chars; - for (;;) - { - const char c2 = *s++; - if (c2 == 0) - return true; - const char c1 = *s1++; - if (MyCharLower_Ascii(c1) != - MyCharLower_Ascii(c2)) - return false; - } -} - -bool UString::IsPrefixedBy_Ascii_NoCase(const char *s) const throw() -{ - const wchar_t *s1 = _chars; - for (;;) - { - const char c2 = *s++; - if (c2 == 0) - return true; - const wchar_t c1 = *s1++; - if (MyCharLower_Ascii(c1) != (unsigned char)MyCharLower_Ascii(c2)) - return false; - } -} - bool StringsAreEqual_Ascii(const char *u, const char *a) throw() { for (;;) diff --git a/CPP/Common/MyString.h b/CPP/Common/MyString.h index 5f41c7b..dfbaf0b 100644 --- a/CPP/Common/MyString.h +++ b/CPP/Common/MyString.h @@ -429,11 +429,11 @@ public: // int CompareNoCase(const char *s) const { return MyStringCompareNoCase(_chars, s); } // int CompareNoCase(const AString &s) const { return MyStringCompareNoCase(_chars, s._chars); } bool IsPrefixedBy(const char *s) const { return IsString1PrefixedByString2(_chars, s); } - bool IsPrefixedBy_Ascii_NoCase(const char *s) const throw(); + bool IsPrefixedBy_Ascii_NoCase(const char *s) const { return IsString1PrefixedByString2_NoCase_Ascii(_chars, s); } bool IsAscii() const { - unsigned len = Len(); + const unsigned len = Len(); const char *s = _chars; for (unsigned i = 0; i < len; i++) if ((unsigned char)s[i] >= 0x80) @@ -727,22 +727,23 @@ public: // int CompareNoCase(const wchar_t *s) const { return MyStringCompareNoCase(_chars, s); } // int CompareNoCase(const UString &s) const { return MyStringCompareNoCase(_chars, s._chars); } bool IsPrefixedBy(const wchar_t *s) const { return IsString1PrefixedByString2(_chars, s); } + bool IsPrefixedBy(const char *s) const { return IsString1PrefixedByString2(_chars, s); } bool IsPrefixedBy_NoCase(const wchar_t *s) const { return IsString1PrefixedByString2_NoCase(_chars, s); } - bool IsPrefixedBy_Ascii_NoCase(const char *s) const throw(); + bool IsPrefixedBy_Ascii_NoCase(const char *s) const { return IsString1PrefixedByString2_NoCase_Ascii(_chars, s); } bool IsAscii() const { - unsigned len = Len(); + const unsigned len = Len(); const wchar_t *s = _chars; for (unsigned i = 0; i < len; i++) - if (s[i] >= 0x80) + if ((unsigned)(int)s[i] >= 0x80) return false; return true; } int Find(wchar_t c) const { return FindCharPosInString(_chars, c); } int Find(wchar_t c, unsigned startIndex) const { - int pos = FindCharPosInString(_chars + startIndex, c); + const int pos = FindCharPosInString(_chars + startIndex, c); return pos < 0 ? -1 : (int)startIndex + pos; } diff --git a/CPP/Common/Wildcard.cpp b/CPP/Common/Wildcard.cpp index dc757c1..413da3b 100644 --- a/CPP/Common/Wildcard.cpp +++ b/CPP/Common/Wildcard.cpp @@ -255,7 +255,8 @@ ForDir nonrec [0, M) same as ForBoth-File bool CItem::AreAllAllowed() const { - return ForFile && ForDir && WildcardMatching && PathParts.Size() == 1 && PathParts.Front() == L"*"; + return ForFile && ForDir && WildcardMatching + && PathParts.Size() == 1 && PathParts.Front().IsEqualTo("*"); } bool CItem::CheckPath(const UStringVector &pathParts, bool isFile) const @@ -542,7 +543,7 @@ unsigned GetNumPrefixParts_if_DrivePath(UStringVector &pathParts) { if (pathParts.Size() < 4 || !pathParts[1].IsEmpty() - || pathParts[2] != L"?") + || !pathParts[2].IsEqualTo("?")) return 0; testIndex = 3; } @@ -574,11 +575,11 @@ static unsigned GetNumPrefixParts(const UStringVector &pathParts) return 1; if (pathParts.Size() == 2) return 2; - if (pathParts[2] == L".") + if (pathParts[2].IsEqualTo(".")) return 3; unsigned networkParts = 2; - if (pathParts[2] == L"?") + if (pathParts[2].IsEqualTo("?")) { if (pathParts.Size() == 3) return 3; @@ -642,7 +643,7 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat if (pathParts.Size() >= 3 && pathParts[0].IsEmpty() && pathParts[1].IsEmpty() - && pathParts[2] == L"?") + && pathParts[2].IsEqualTo("?")) ignoreWildcardIndex = 2; // #endif @@ -665,7 +666,7 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat for (unsigned i = numPrefixParts; i < pathParts.Size(); i++) { const UString &part = pathParts[i]; - if (part == L".." || part == L".") + if (part.IsEqualTo("..") || part.IsEqualTo(".")) dotsIndex = (int)i; } diff --git a/CPP/Windows/FileDir.cpp b/CPP/Windows/FileDir.cpp index 5063568..4a4bf52 100644 --- a/CPP/Windows/FileDir.cpp +++ b/CPP/Windows/FileDir.cpp @@ -124,7 +124,7 @@ bool GetSystemDir(FString &path) #endif // UNDER_CE -bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +static bool SetFileTime_Base(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime, DWORD dwFlagsAndAttributes) { #ifndef _UNICODE if (!g_IsNT) @@ -137,14 +137,14 @@ bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CF HANDLE hDir = INVALID_HANDLE_VALUE; IF_USE_MAIN_PATH hDir = ::CreateFileW(fs2us(path), GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + NULL, OPEN_EXISTING, dwFlagsAndAttributes, NULL); #ifdef Z7_LONG_PATH if (hDir == INVALID_HANDLE_VALUE && USE_SUPER_PATH) { UString superPath; if (GetSuperPath(path, superPath, USE_MAIN_PATH)) hDir = ::CreateFileW(superPath, GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + NULL, OPEN_EXISTING, dwFlagsAndAttributes, NULL); } #endif @@ -157,6 +157,15 @@ bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CF return res; } +bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +{ + return SetFileTime_Base(path, cTime, aTime, mTime, FILE_FLAG_BACKUP_SEMANTICS); +} + +bool SetLinkFileTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +{ + return SetFileTime_Base(path, cTime, aTime, mTime, FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT); +} bool SetFileAttrib(CFSTR path, DWORD attrib) @@ -651,6 +660,35 @@ bool RemoveDirWithSubItems(const FString &path) return RemoveDir(path); } +bool RemoveDirAlways_if_Empty(const FString &path) +{ + const DWORD attrib = NFind::GetFileAttrib(path); + if (attrib != INVALID_FILE_ATTRIBUTES + && (attrib & FILE_ATTRIBUTE_READONLY)) + { + bool need_ClearAttrib = true; + if ((attrib & FILE_ATTRIBUTE_REPARSE_POINT) == 0) + { + FString s (path); + s.Add_PathSepar(); + NFind::CEnumerator enumerator; + enumerator.SetDirPrefix(s); + NFind::CDirEntry fi; + if (enumerator.Next(fi)) + { + // we don't want to change attributes, if there are files + // in directory, because RemoveDir(path) will fail. + need_ClearAttrib = false; + // SetLastError(ERROR_DIR_NOT_EMPTY); + // return false; + } + } + if (need_ClearAttrib) + SetFileAttrib(path, 0); // we clear read-only attrib to remove read-only dir + } + return RemoveDir(path); +} + #endif // _WIN32 #ifdef UNDER_CE @@ -1144,17 +1182,15 @@ bool GetCurrentDir(FString &path) -bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +static bool SetFileTime_Base(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime, const int flags) { // need testing /* struct utimbuf buf; struct stat st; UNUSED_VAR(cTime) - printf("\nstat = %s\n", path); int ret = stat(path, &st); - if (ret == 0) { buf.actime = st.st_atime; @@ -1166,47 +1202,42 @@ bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CF buf.actime = cur_time; buf.modtime = cur_time; } - if (aTime) { UInt32 ut; if (NTime::FileTimeToUnixTime(*aTime, ut)) buf.actime = ut; } - if (mTime) { UInt32 ut; if (NTime::FileTimeToUnixTime(*mTime, ut)) buf.modtime = ut; } - return utime(path, &buf) == 0; */ // if (!aTime && !mTime) return true; - struct timespec times[2]; UNUSED_VAR(cTime) - bool needChange; needChange = FiTime_To_timespec(aTime, times[0]); needChange |= FiTime_To_timespec(mTime, times[1]); - - /* - if (mTime) - { - printf("\n time = %ld.%9ld\n", mTime->tv_sec, mTime->tv_nsec); - } - */ - + // if (mTime) { printf("\n time = %ld.%9ld\n", mTime->tv_sec, mTime->tv_nsec); } if (!needChange) return true; - const int flags = 0; // follow link - // = AT_SYMLINK_NOFOLLOW; // don't follow link return utimensat(AT_FDCWD, path, times, flags) == 0; } +bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +{ + return SetFileTime_Base(path, cTime, aTime, mTime, 0); // (flags = 0) means follow_link +} + +bool SetLinkFileTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +{ + return SetFileTime_Base(path, cTime, aTime, mTime, AT_SYMLINK_NOFOLLOW); +} struct C_umask diff --git a/CPP/Windows/FileDir.h b/CPP/Windows/FileDir.h index ef6a43a..eaed94d 100644 --- a/CPP/Windows/FileDir.h +++ b/CPP/Windows/FileDir.h @@ -18,9 +18,20 @@ bool GetSystemDir(FString &path); WIN32 API : SetFileTime() doesn't allow to set zero timestamps in file but linux : allows unix time = 0 in filesystem */ - +/* +SetDirTime() can be used to set time for file or for dir. +If path is symbolic link, SetDirTime() will follow symbolic link, +and it will set timestamps of symbolic link's target file or dir. +*/ bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime); +/* +SetLinkFileTime() doesn't follow symbolic link, +and it sets timestamps for symbolic link file itself. +If (path) is not symbolic link, it still can work (at least in some new OS versions). +*/ +bool SetLinkFileTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime); + #ifdef _WIN32 @@ -78,6 +89,11 @@ bool CreateComplexDir(CFSTR path); bool DeleteFileAlways(CFSTR name); bool RemoveDirWithSubItems(const FString &path); +#ifdef _WIN32 +bool RemoveDirAlways_if_Empty(const FString &path); +#else +#define RemoveDirAlways_if_Empty RemoveDir +#endif bool MyGetFullPathName(CFSTR path, FString &resFullPath); bool GetFullPathAndSplit(CFSTR path, FString &resDirPrefix, FString &resFileName); diff --git a/CPP/Windows/FileFind.cpp b/CPP/Windows/FileFind.cpp index 54e4590..f3e982c 100644 --- a/CPP/Windows/FileFind.cpp +++ b/CPP/Windows/FileFind.cpp @@ -731,7 +731,7 @@ bool CFileInfo::Find(CFSTR path, bool followLink) bool isOK = false; if (finder.FindFirst(s, *this)) { - if (Name == FTEXT(".")) + if (Name.IsEqualTo(".")) { Name = path + prefixSize; return true; @@ -769,6 +769,13 @@ bool CFileInfo::Find(CFSTR path, bool followLink) // return FollowReparse(path, IsDir()); return Fill_From_ByHandleFileInfo(path); +/* + // Fill_From_ByHandleFileInfo returns false (with Access Denied error), + // if there is reparse link file (not directory reparse item). + if (Fill_From_ByHandleFileInfo(path)) + return true; + return HasReparsePoint(); +*/ } bool CFileInfoBase::Fill_From_ByHandleFileInfo(CFSTR path) diff --git a/CPP/Windows/FileIO.h b/CPP/Windows/FileIO.h index af2b325..1bdc48e 100644 --- a/CPP/Windows/FileIO.h +++ b/CPP/Windows/FileIO.h @@ -11,8 +11,7 @@ #define Z7_WIN_SYMLINK_FLAG_RELATIVE 1 -// what the meaning of that FLAG or field (2)? -#define Z7_WIN_LX_SYMLINK_FLAG 2 +#define Z7_WIN_LX_SYMLINK_VERSION_2 2 #ifdef _WIN32 @@ -44,7 +43,33 @@ namespace NWindows { namespace NFile { #if defined(_WIN32) && !defined(UNDER_CE) -bool FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool isWSL); +/* + in: (CByteBuffer &dest) is empty + in: (path) uses Windows path separator (\). + out: (path) uses Linux path separator (/). + if (isAbsPath == true), then "c:\\" prefix is replaced to "/mnt/c/" prefix +*/ +void Convert_WinPath_to_WslLinuxPath(FString &path, bool convertDrivePath); +// (path) must use Linux path separator (/). +void FillLinkData_WslLink(CByteBuffer &dest, const wchar_t *path); + +/* + in: (CByteBuffer &dest) is empty + if (isSymLink == false) : MOUNT_POINT : (path) must be absolute. + if (isSymLink == true) : SYMLINK : Windows + (path) must use Windows path separator (\). + (path) must be without link "\\??\\" prefix. + link "\\??\\" prefix will be added inside FillLinkData(), if path is absolute. +*/ +void FillLinkData_WinLink(CByteBuffer &dest, const wchar_t *path, bool isSymLink); +// in: (CByteBuffer &dest) is empty +inline void FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool isWSL) +{ + if (isWSL) + FillLinkData_WslLink(dest, path); + else + FillLinkData_WinLink(dest, path, isSymLink); +} #endif struct CReparseShortInfo @@ -61,7 +86,6 @@ struct CReparseAttr UInt32 Flags; UString SubsName; UString PrintName; - AString WslName; bool HeaderError; @@ -71,8 +95,7 @@ struct CReparseAttr CReparseAttr(): Tag(0), Flags(0) {} - // Parse() - // returns (true) and (ErrorCode = 0), if (it'a correct known link) + // returns (true) and (ErrorCode = 0), if (it's correct known link) // returns (false) and (ErrorCode = ERROR_REPARSE_TAG_INVALID), if unknown tag bool Parse(const Byte *p, size_t size); @@ -80,18 +103,14 @@ struct CReparseAttr bool IsSymLink_Win() const { return Tag == Z7_WIN_IO_REPARSE_TAG_SYMLINK; } bool IsSymLink_WSL() const { return Tag == Z7_WIN_IO_REPARSE_TAG_LX_SYMLINK; } + // note: "/dir1/path" is marked as relative. bool IsRelative_Win() const { return Flags == Z7_WIN_SYMLINK_FLAG_RELATIVE; } bool IsRelative_WSL() const { - if (WslName.IsEmpty()) - return true; - char c = WslName[0]; - return !IS_PATH_SEPAR(c); + return WslName[0] != '/'; // WSL uses unix path separator } - // bool IsVolume() const; - bool IsOkNamePair() const; UString GetPath() const; }; diff --git a/CPP/Windows/FileLink.cpp b/CPP/Windows/FileLink.cpp index 78961d5..a3c4f04 100644 --- a/CPP/Windows/FileLink.cpp +++ b/CPP/Windows/FileLink.cpp @@ -38,13 +38,25 @@ namespace NFile { using namespace NName; +/* +Win10 Junctions/SymLinks: + - (/) slash doesn't work as path separator + - Win10 preinstalled junctions don't use tail backslash, but tail backslashes also work. + - double backslash works only after drive prefix "c:\\dir1\dir2\", + and doesn't work in another places. + - absolute path without \??\ prefix doesn't work + - absolute path "c:" doesn't work +*/ + /* Reparse Points (Junctions and Symbolic Links): struct { UInt32 Tag; UInt16 Size; // not including starting 8 bytes - UInt16 Reserved; // = 0 + UInt16 Reserved; // = 0, DOCs: // Length, in bytes, of the unparsed portion of + // the file name pointed to by the FileName member of the associated file object. + // This member is only valid for create operations when the I/O fails with STATUS_REPARSE. UInt16 SubstituteOffset; // offset in bytes from start of namesChars UInt16 SubstituteLen; // size in bytes, it doesn't include tailed NUL @@ -68,6 +80,16 @@ using namespace NName; 2) Default Order in table: Print Path Substitute Path + +DOCS: + The print name SHOULD be an informative pathname, suitable for display + to a user, that also identifies the target of the mount point. + Neither of these pathnames can contain dot directory names. + +reparse tags, with the exception of IO_REPARSE_TAG_SYMLINK, +are processed on the server and are not processed by a client +after transmission over the wire. +Clients SHOULD treat associated reparse data as opaque data. */ /* @@ -93,7 +115,8 @@ static const UInt32 kReparseFlags_Microsoft = ((UInt32)1 << 31); #define Get16(p) GetUi16(p) #define Get32(p) GetUi32(p) -static const wchar_t * const k_LinkPrefix = L"\\??\\"; +static const char * const k_LinkPrefix = "\\??\\"; +static const char * const k_LinkPrefix_UNC = "\\??\\UNC\\"; static const unsigned k_LinkPrefix_Size = 4; static bool IsLinkPrefix(const wchar_t *s) @@ -102,7 +125,7 @@ static bool IsLinkPrefix(const wchar_t *s) } /* -static const wchar_t * const k_VolumePrefix = L"Volume{"; +static const char * const k_VolumePrefix = "Volume{"; static const bool IsVolumeName(const wchar_t *s) { return IsString1PrefixedByString2(s, k_VolumePrefix); @@ -118,7 +141,7 @@ static void WriteString(Byte *dest, const wchar_t *path) { for (;;) { - wchar_t c = *path++; + const wchar_t c = *path++; if (c == 0) return; Set16(dest, (UInt16)c) @@ -126,62 +149,103 @@ static void WriteString(Byte *dest, const wchar_t *path) } } -bool FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool isWSL) +#ifdef _WIN32 +void Convert_WinPath_to_WslLinuxPath(FString &s, bool convertDrivePath) { - bool isAbs = IsAbsolutePath(path); - if (!isAbs && !isSymLink) - return false; - - if (isWSL) + if (convertDrivePath && IsDrivePath(s)) { - // unsupported characters probably use Replacement Character UTF-16 0xFFFD - AString utf; - ConvertUnicodeToUTF8(path, utf); - const size_t size = 4 + utf.Len(); - if (size != (UInt16)size) - return false; - dest.Alloc(8 + size); - Byte *p = dest; - Set32(p, Z7_WIN_IO_REPARSE_TAG_LX_SYMLINK) - Set16(p + 4, (UInt16)(size)) - Set16(p + 6, 0) - Set32(p + 8, Z7_WIN_LX_SYMLINK_FLAG) - memcpy(p + 12, utf.Ptr(), utf.Len()); - return true; + FChar c = s[0]; + c = MyCharLower_Ascii(c); + s.DeleteFrontal(2); + s.InsertAtFront(c); + s.Insert(0, FTEXT("/mnt/")); } + s.Replace(FCHAR_PATH_SEPARATOR, FTEXT('/')); +} +#endif - // usual symbolic LINK (NOT WSL) + +static const unsigned k_Link_Size_Limit = 1u << 16; // 16-bit field is used for size. + +void FillLinkData_WslLink(CByteBuffer &dest, const wchar_t *path) +{ + // dest.Free(); // it's empty already + // WSL probably uses Replacement Character UTF-16 0xFFFD for unsupported characters? + AString utf; + ConvertUnicodeToUTF8(path, utf); + const unsigned size = 4 + utf.Len(); + if (size >= k_Link_Size_Limit) + return; + dest.Alloc(8 + size); + Byte *p = dest; + Set32(p, Z7_WIN_IO_REPARSE_TAG_LX_SYMLINK) + // Set32(p + 4, (UInt32)size) + Set16(p + 4, (UInt16)size) + Set16(p + 6, 0) + Set32(p + 8, Z7_WIN_LX_SYMLINK_VERSION_2) + memcpy(p + 12, utf.Ptr(), utf.Len()); +} + + +void FillLinkData_WinLink(CByteBuffer &dest, const wchar_t *path, bool isSymLink) +{ + // dest.Free(); // it's empty already + bool isAbs = false; + if (IS_PATH_SEPAR(path[0])) + { + // root paths "\dir1\path" are marked as relative + if (IS_PATH_SEPAR(path[1])) + isAbs = true; + } + else + isAbs = IsAbsolutePath(path); + if (!isAbs && !isSymLink) + { + // Win10 allows us to create relative MOUNT_POINT. + // But relative MOUNT_POINT will not work when accessing it. + // So we prevent useless creation of a relative MOUNT_POINT. + return; + } bool needPrintName = true; - - if (IsSuperPath(path)) + UString subs (path); + if (isAbs) { - path += kSuperPathPrefixSize; - if (!IsDrivePath(path)) - needPrintName = false; + const bool isSuperPath = IsSuperPath(path); + if (!isSuperPath && NName::IsNetworkPath(us2fs(path))) + { + subs = k_LinkPrefix_UNC; + subs += (path + 2); + } + else + { + if (isSuperPath) + { + // we remove super prefix: + path += kSuperPathPrefixSize; + // we want to get correct abolute path in PrintName still. + if (!IsDrivePath(path)) + needPrintName = false; // we need "\\server\path" for print name. + } + subs = k_LinkPrefix; + subs += path; + } } - - const unsigned add_Prefix_Len = isAbs ? k_LinkPrefix_Size : 0; - + const size_t len1 = subs.Len() * 2; size_t len2 = (size_t)MyStringLen(path) * 2; - const size_t len1 = len2 + add_Prefix_Len * 2; if (!needPrintName) len2 = 0; - - size_t totalNamesSize = (len1 + len2); - + size_t totalNamesSize = len1 + len2; /* some WIM imagex software uses old scheme for symbolic links. - so we can old scheme for byte to byte compatibility */ - - bool newOrderScheme = isSymLink; + so we can use old scheme for byte to byte compatibility */ + const bool newOrderScheme = isSymLink; // newOrderScheme = false; - if (!newOrderScheme) - totalNamesSize += 2 * 2; + totalNamesSize += 2 * 2; // we use NULL terminators in old scheme. const size_t size = 8 + 8 + (isSymLink ? 4 : 0) + totalNamesSize; - if (size != (UInt16)size) - return false; + if (size >= k_Link_Size_Limit) + return; dest.Alloc(size); memset(dest, 0, size); const UInt32 tag = isSymLink ? @@ -189,6 +253,7 @@ bool FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool i Z7_WIN_IO_REPARSE_TAG_MOUNT_POINT; Byte *p = dest; Set32(p, tag) + // Set32(p + 4, (UInt32)(size - 8)) Set16(p + 4, (UInt16)(size - 8)) Set16(p + 6, 0) p += 8; @@ -204,21 +269,16 @@ bool FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool i Set16(p + 2, (UInt16)len1) Set16(p + 4, (UInt16)printOffs) Set16(p + 6, (UInt16)len2) - p += 8; if (isSymLink) { - UInt32 flags = isAbs ? 0 : Z7_WIN_SYMLINK_FLAG_RELATIVE; + const UInt32 flags = isAbs ? 0 : Z7_WIN_SYMLINK_FLAG_RELATIVE; Set32(p, flags) p += 4; } - - if (add_Prefix_Len != 0) - WriteString(p + subOffs, k_LinkPrefix); - WriteString(p + subOffs + add_Prefix_Len * 2, path); + WriteString(p + subOffs, subs); if (needPrintName) WriteString(p + printOffs, path); - return true; } #endif // defined(_WIN32) && !defined(UNDER_CE) @@ -230,7 +290,7 @@ static void GetString(const Byte *p, unsigned len, UString &res) unsigned i; for (i = 0; i < len; i++) { - wchar_t c = Get16(p + i * 2); + const wchar_t c = Get16(p + (size_t)i * 2); if (c == 0) break; s[i] = c; @@ -239,6 +299,7 @@ static void GetString(const Byte *p, unsigned len, UString &res) res.ReleaseBuf_SetLen(i); } + bool CReparseAttr::Parse(const Byte *p, size_t size) { ErrorCode = (DWORD)ERROR_INVALID_REPARSE_DATA; @@ -250,7 +311,12 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) return false; Tag = Get32(p); if (Get16(p + 6) != 0) // padding - return false; + { + // DOCs: Reserved : the field SHOULD be set to 0 + // and MUST be ignored (by parser). + // Win10 ignores it. + MinorError = true; // optional + } unsigned len = Get16(p + 4); p += 8; size -= 8; @@ -262,8 +328,6 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) (type & kReparseFlags_Microsoft) == 0 || (type & 0xFFFF) != 3) */ - - HeaderError = false; if ( Tag != Z7_WIN_IO_REPARSE_TAG_MOUNT_POINT @@ -282,8 +346,7 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) { if (len < 4) return false; - Flags = Get32(p); // maybe it's not Flags - if (Flags != Z7_WIN_LX_SYMLINK_FLAG) + if (Get32(p) != Z7_WIN_LX_SYMLINK_VERSION_2) return false; len -= 4; p += 4; @@ -291,12 +354,13 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) unsigned i; for (i = 0; i < len; i++) { - char c = (char)p[i]; + const char c = (char)p[i]; s[i] = c; if (c == 0) break; } - WslName.ReleaseBuf_SetEnd(i); + s[i] = 0; + WslName.ReleaseBuf_SetLen(i); MinorError = (i != len); ErrorCode = 0; return true; @@ -304,10 +368,10 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) if (len < 8) return false; - unsigned subOffs = Get16(p); - unsigned subLen = Get16(p + 2); - unsigned printOffs = Get16(p + 4); - unsigned printLen = Get16(p + 6); + const unsigned subOffs = Get16(p); + const unsigned subLen = Get16(p + 2); + const unsigned printOffs = Get16(p + 4); + const unsigned printLen = Get16(p + 6); len -= 8; p += 8; @@ -335,15 +399,17 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) bool CReparseShortInfo::Parse(const Byte *p, size_t size) { - const Byte *start = p; - Offset= 0; + const Byte * const start = p; + Offset = 0; Size = 0; if (size < 8) return false; - UInt32 Tag = Get32(p); + const UInt32 Tag = Get32(p); UInt32 len = Get16(p + 4); + /* if (len + 8 > size) return false; + */ /* if ((type & kReparseFlags_Alias) == 0 || (type & kReparseFlags_Microsoft) == 0 || @@ -353,16 +419,14 @@ bool CReparseShortInfo::Parse(const Byte *p, size_t size) Tag != Z7_WIN_IO_REPARSE_TAG_SYMLINK) // return true; return false; - + /* if (Get16(p + 6) != 0) // padding return false; - + */ p += 8; size -= 8; - if (len != size) // do we need that check? return false; - if (len < 8) return false; unsigned subOffs = Get16(p); @@ -396,10 +460,14 @@ bool CReparseAttr::IsOkNamePair() const { if (IsLinkPrefix(SubsName)) { + if (PrintName == GetPath()) + return true; +/* if (!IsDrivePath(SubsName.Ptr(k_LinkPrefix_Size))) return PrintName.IsEmpty(); if (wcscmp(SubsName.Ptr(k_LinkPrefix_Size), PrintName) == 0) return true; +*/ } return wcscmp(SubsName, PrintName) == 0; } @@ -415,21 +483,26 @@ bool CReparseAttr::IsVolume() const UString CReparseAttr::GetPath() const { + UString s (SubsName); if (IsSymLink_WSL()) { - UString u; // if (CheckUTF8(attr.WslName) - if (!ConvertUTF8ToUnicode(WslName, u)) - MultiByteToUnicodeString2(u, WslName); - return u; + if (!ConvertUTF8ToUnicode(WslName, s)) + MultiByteToUnicodeString2(s, WslName); } - - UString s (SubsName); - if (IsLinkPrefix(s)) + else if (IsLinkPrefix(s)) { - s.ReplaceOneCharAtPos(1, '\\'); // we normalize prefix from "\??\" to "\\?\" - if (IsDrivePath(s.Ptr(k_LinkPrefix_Size))) - s.DeleteFrontal(k_LinkPrefix_Size); + if (IsString1PrefixedByString2_NoCase_Ascii(s.Ptr(), k_LinkPrefix_UNC)) + { + s.DeleteFrontal(6); + s.ReplaceOneCharAtPos(0, '\\'); + } + else + { + s.ReplaceOneCharAtPos(1, '\\'); // we normalize prefix from "\??\" to "\\?\" + if (IsDrivePath(s.Ptr(k_LinkPrefix_Size))) + s.DeleteFrontal(k_LinkPrefix_Size); + } } return s; } @@ -468,7 +541,7 @@ bool GetReparseData(CFSTR path, CByteBuffer &reparseData, BY_HANDLE_FILE_INFORMA static bool CreatePrefixDirOfFile(CFSTR path) { FString path2 (path); - int pos = path2.ReverseFind_PathSepar(); + const int pos = path2.ReverseFind_PathSepar(); if (pos < 0) return true; #ifdef _WIN32 @@ -494,6 +567,8 @@ static bool OutIoReparseData(DWORD controlCode, CFSTR path, void *data, DWORD si } +// MOUNT_POINT (Junction Point) and LX_SYMLINK (WSL) can be written without administrator rights. +// SYMLINK requires administrator rights. // If there is Reparse data already, it still writes new Reparse data bool SetReparseData(CFSTR path, bool isDir, const void *data, DWORD size) { @@ -540,10 +615,11 @@ bool DeleteReparseData(CFSTR path) SetLastError(ERROR_INVALID_REPARSE_DATA); return false; } - BYTE buf[my_REPARSE_DATA_BUFFER_HEADER_SIZE]; - memset(buf, 0, sizeof(buf)); - memcpy(buf, reparseData, 4); // tag - return OutIoReparseData(my_FSCTL_DELETE_REPARSE_POINT, path, buf, sizeof(buf)); + // BYTE buf[my_REPARSE_DATA_BUFFER_HEADER_SIZE]; + // memset(buf, 0, sizeof(buf)); + // memcpy(buf, reparseData, 4); // tag + memset(reparseData + 4, 0, my_REPARSE_DATA_BUFFER_HEADER_SIZE - 4); + return OutIoReparseData(my_FSCTL_DELETE_REPARSE_POINT, path, reparseData, my_REPARSE_DATA_BUFFER_HEADER_SIZE); } } diff --git a/CPP/Windows/FileName.cpp b/CPP/Windows/FileName.cpp index f75c944..180b7bc 100644 --- a/CPP/Windows/FileName.cpp +++ b/CPP/Windows/FileName.cpp @@ -65,8 +65,15 @@ void NormalizeDirPathPrefix(UString &dirPath) dirPath.Add_PathSepar(); } + +#define IS_LETTER_CHAR(c) ((((unsigned)(int)(c) | 0x20) - (unsigned)'a' <= (unsigned)('z' - 'a'))) +bool IsDrivePath (const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':' && IS_SEPAR(s[2]); } +// bool IsDriveName2(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':' && s[2] == 0; } + #ifdef _WIN32 +bool IsDrivePath2(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':'; } + #ifndef USE_UNICODE_FSTRING #ifdef Z7_LONG_PATH static void NormalizeDirSeparators(UString &s) @@ -87,13 +94,6 @@ void NormalizeDirSeparators(FString &s) s.ReplaceOneCharAtPos(i, FCHAR_PATH_SEPARATOR); } -#endif - - -#define IS_LETTER_CHAR(c) ((((unsigned)(int)(c) | 0x20) - (unsigned)'a' <= (unsigned)('z' - 'a'))) - -bool IsDrivePath(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':' && IS_SEPAR(s[2]); } - bool IsAltPathPrefix(CFSTR s) throw() { unsigned len = MyStringLen(s); @@ -117,16 +117,23 @@ bool IsAltPathPrefix(CFSTR s) throw() return true; } -#if defined(_WIN32) && !defined(UNDER_CE) +#endif // _WIN32 -const char * const kSuperPathPrefix = "\\\\?\\"; + +const char * const kSuperPathPrefix = + STRING_PATH_SEPARATOR + STRING_PATH_SEPARATOR "?" + STRING_PATH_SEPARATOR; #ifdef Z7_LONG_PATH -static const char * const kSuperUncPrefix = "\\\\?\\UNC\\"; +static const char * const kSuperUncPrefix = + STRING_PATH_SEPARATOR + STRING_PATH_SEPARATOR "?" + STRING_PATH_SEPARATOR "UNC" + STRING_PATH_SEPARATOR; #endif #define IS_DEVICE_PATH(s) (IS_SEPAR((s)[0]) && IS_SEPAR((s)[1]) && (s)[2] == '.' && IS_SEPAR((s)[3])) #define IS_SUPER_PREFIX(s) (IS_SEPAR((s)[0]) && IS_SEPAR((s)[1]) && (s)[2] == '?' && IS_SEPAR((s)[3])) -#define IS_SUPER_OR_DEVICE_PATH(s) (IS_SEPAR((s)[0]) && IS_SEPAR((s)[1]) && ((s)[2] == '?' || (s)[2] == '.') && IS_SEPAR((s)[3])) #define IS_UNC_WITH_SLASH(s) ( \ ((s)[0] == 'U' || (s)[0] == 'u') \ @@ -134,6 +141,16 @@ static const char * const kSuperUncPrefix = "\\\\?\\UNC\\"; && ((s)[2] == 'C' || (s)[2] == 'c') \ && IS_SEPAR((s)[3])) +static const unsigned kDrivePrefixSize = 3; /* c:\ */ + +bool IsSuperPath(const wchar_t *s) throw(); +bool IsSuperPath(const wchar_t *s) throw() { return IS_SUPER_PREFIX(s); } +// bool IsSuperUncPath(const wchar_t *s) throw() { return (IS_SUPER_PREFIX(s) && IS_UNC_WITH_SLASH(s + kSuperPathPrefixSize)); } + +#if defined(_WIN32) && !defined(UNDER_CE) + +#define IS_SUPER_OR_DEVICE_PATH(s) (IS_SEPAR((s)[0]) && IS_SEPAR((s)[1]) && ((s)[2] == '?' || (s)[2] == '.') && IS_SEPAR((s)[3])) +bool IsSuperOrDevicePath(const wchar_t *s) throw() { return IS_SUPER_OR_DEVICE_PATH(s); } bool IsDevicePath(CFSTR s) throw() { #ifdef UNDER_CE @@ -154,7 +171,7 @@ bool IsDevicePath(CFSTR s) throw() if (!IS_DEVICE_PATH(s)) return false; - unsigned len = MyStringLen(s); + const unsigned len = MyStringLen(s); if (len == 6 && s[5] == ':') return true; if (len < 18 || len > 22 || !IsString1PrefixedByString2(s + kDevicePathPrefixSize, "PhysicalDrive")) @@ -174,7 +191,7 @@ bool IsNetworkPath(CFSTR s) throw() return false; if (IsSuperUncPath(s)) return true; - FChar c = s[2]; + const FChar c = s[2]; return (c != '.' && c != '?'); } @@ -187,7 +204,7 @@ unsigned GetNetworkServerPrefixSize(CFSTR s) throw() prefixSize = kSuperUncPathPrefixSize; else { - FChar c = s[2]; + const FChar c = s[2]; if (c == '.' || c == '?') return 0; } @@ -209,14 +226,6 @@ bool IsNetworkShareRootPath(CFSTR s) throw() return s[(unsigned)pos + 1] == 0; } -static const unsigned kDrivePrefixSize = 3; /* c:\ */ - -bool IsDrivePath2(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':'; } -// bool IsDriveName2(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':' && s[2] == 0; } -bool IsSuperPath(const wchar_t *s) throw() { return IS_SUPER_PREFIX(s); } -bool IsSuperOrDevicePath(const wchar_t *s) throw() { return IS_SUPER_OR_DEVICE_PATH(s); } -// bool IsSuperUncPath(const wchar_t *s) throw() { return (IS_SUPER_PREFIX(s) && IS_UNC_WITH_SLASH(s + kSuperPathPrefixSize)); } - bool IsAltStreamPrefixWithColon(const UString &s) throw() { if (s.IsEmpty()) @@ -349,14 +358,16 @@ unsigned GetRootPrefixSize(CFSTR s) throw() } #endif // USE_UNICODE_FSTRING +#endif // _WIN32 + static unsigned GetRootPrefixSize_Of_NetworkPath(const wchar_t *s) throw() { // Network path: we look "server\path\" as root prefix - int pos = FindSepar(s); + const int pos = FindSepar(s); if (pos < 0) return 0; - int pos2 = FindSepar(s + (unsigned)pos + 1); + const int pos2 = FindSepar(s + (unsigned)pos + 1); if (pos2 < 0) return 0; return (unsigned)(pos + pos2 + 2); @@ -370,7 +381,7 @@ static unsigned GetRootPrefixSize_Of_SimplePath(const wchar_t *s) throw() return 0; if (s[1] == 0 || !IS_SEPAR(s[1])) return 1; - unsigned size = GetRootPrefixSize_Of_NetworkPath(s + 2); + const unsigned size = GetRootPrefixSize_Of_NetworkPath(s + 2); return (size == 0) ? 0 : 2 + size; } @@ -378,17 +389,21 @@ static unsigned GetRootPrefixSize_Of_SuperPath(const wchar_t *s) throw() { if (IS_UNC_WITH_SLASH(s + kSuperPathPrefixSize)) { - unsigned size = GetRootPrefixSize_Of_NetworkPath(s + kSuperUncPathPrefixSize); + const unsigned size = GetRootPrefixSize_Of_NetworkPath(s + kSuperUncPathPrefixSize); return (size == 0) ? 0 : kSuperUncPathPrefixSize + size; } // we support \\?\c:\ paths and volume GUID paths \\?\Volume{GUID}\" - int pos = FindSepar(s + kSuperPathPrefixSize); + const int pos = FindSepar(s + kSuperPathPrefixSize); if (pos < 0) return 0; return kSuperPathPrefixSize + (unsigned)(pos + 1); } +#ifdef _WIN32 unsigned GetRootPrefixSize(const wchar_t *s) throw() +#else +unsigned GetRootPrefixSize_WINDOWS(const wchar_t *s) throw() +#endif { if (IS_DEVICE_PATH(s)) return kDevicePathPrefixSize; @@ -397,7 +412,7 @@ unsigned GetRootPrefixSize(const wchar_t *s) throw() return GetRootPrefixSize_Of_SimplePath(s); } -#else // _WIN32 +#ifndef _WIN32 bool IsAbsolutePath(const wchar_t *s) throw() { return IS_SEPAR(s[0]); } diff --git a/CPP/Windows/FileName.h b/CPP/Windows/FileName.h index d0e9dc4..b91a436 100644 --- a/CPP/Windows/FileName.h +++ b/CPP/Windows/FileName.h @@ -25,13 +25,13 @@ bool IsDrivePath(const wchar_t *s) throw(); // first 3 chars are drive chars li bool IsAltPathPrefix(CFSTR s) throw(); /* name: */ -#if defined(_WIN32) && !defined(UNDER_CE) - extern const char * const kSuperPathPrefix; /* \\?\ */ const unsigned kDevicePathPrefixSize = 4; const unsigned kSuperPathPrefixSize = 4; const unsigned kSuperUncPathPrefixSize = kSuperPathPrefixSize + 4; +#if defined(_WIN32) && !defined(UNDER_CE) + bool IsDevicePath(CFSTR s) throw(); /* \\.\ */ bool IsSuperUncPath(CFSTR s) throw(); /* \\?\UNC\ */ bool IsNetworkPath(CFSTR s) throw(); /* \\?\UNC\ or \\SERVER */ @@ -86,6 +86,15 @@ int FindAltStreamColon(CFSTR path) throw(); bool IsAbsolutePath(const wchar_t *s) throw(); unsigned GetRootPrefixSize(const wchar_t *s) throw(); +#ifndef _WIN32 +/* GetRootPrefixSize_WINDOWS() is called in linux, but it parses path by windows rules. + It supports only paths system (linux) slash separators (STRING_PATH_SEPARATOR), + It doesn't parses paths with backslash (windows) separators. + "c:/dir/file" is supported. +*/ +unsigned GetRootPrefixSize_WINDOWS(const wchar_t *s) throw(); +#endif + #ifdef Z7_LONG_PATH const int kSuperPathType_UseOnlyMain = 0; diff --git a/CPP/Windows/System.cpp b/CPP/Windows/System.cpp index 540aa40..4bdc9a5 100644 --- a/CPP/Windows/System.cpp +++ b/CPP/Windows/System.cpp @@ -25,6 +25,69 @@ namespace NSystem { #ifdef _WIN32 +/* +note: returned value in 32-bit version can be limited by value 32. + while 64-bit version returns full value. +GetMaximumProcessorCount(groupNumber) can return higher value than +GetActiveProcessorCount(groupNumber) in some cases, because CPUs can be added. +*/ +// typedef DWORD (WINAPI *Func_GetMaximumProcessorCount)(WORD GroupNumber); +typedef DWORD (WINAPI *Func_GetActiveProcessorCount)(WORD GroupNumber); +typedef WORD (WINAPI *Func_GetActiveProcessorGroupCount)(VOID); +/* +#if 0 && defined(ALL_PROCESSOR_GROUPS) +#define MY_ALL_PROCESSOR_GROUPS ALL_PROCESSOR_GROUPS +#else +#define MY_ALL_PROCESSOR_GROUPS 0xffff +#endif +*/ + +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + +bool CCpuGroups::Load() +{ + NumThreadsTotal = 0; + GroupSizes.Clear(); + const HMODULE hmodule = ::GetModuleHandleA("kernel32.dll"); + // Is_Win11_Groups = GetProcAddress(hmodule, "SetThreadSelectedCpuSetMasks") != NULL; + const + Func_GetActiveProcessorGroupCount + fn_GetActiveProcessorGroupCount = Z7_GET_PROC_ADDRESS( + Func_GetActiveProcessorGroupCount, hmodule, + "GetActiveProcessorGroupCount"); + const + Func_GetActiveProcessorCount + fn_GetActiveProcessorCount = Z7_GET_PROC_ADDRESS( + Func_GetActiveProcessorCount, hmodule, + "GetActiveProcessorCount"); + if (!fn_GetActiveProcessorGroupCount || + !fn_GetActiveProcessorCount) + return false; + + const unsigned numGroups = fn_GetActiveProcessorGroupCount(); + if (numGroups == 0) + return false; + UInt32 sum = 0; + for (unsigned i = 0; i < numGroups; i++) + { + const UInt32 num = fn_GetActiveProcessorCount((WORD)i); + /* + if (num == 0) + { + // it means error + // but is it possible that some group is empty by some reason? + // GroupSizes.Clear(); + // return false; + } + */ + sum += num; + GroupSizes.Add(num); + } + NumThreadsTotal = sum; + // NumThreadsTotal = fn_GetActiveProcessorCount(MY_ALL_PROCESSOR_GROUPS); + return true; +} + UInt32 CountAffinity(DWORD_PTR mask) { UInt32 num = 0; @@ -38,31 +101,62 @@ UInt32 CountAffinity(DWORD_PTR mask) BOOL CProcessAffinity::Get() { - #ifndef UNDER_CE - return GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask); - #else - return FALSE; - #endif + IsGroupMode = false; + Groups.Load(); + // SetThreadAffinityMask(GetCurrentThread(), 1); + // SetProcessAffinityMask(GetCurrentProcess(), 1); + BOOL res = GetProcessAffinityMask(GetCurrentProcess(), + &processAffinityMask, &systemAffinityMask); + /* DOCs: On a system with more than 64 processors, if the threads + of the calling process are in a single processor group, the + function sets the variables pointed to by lpProcessAffinityMask + and lpSystemAffinityMask to the process affinity mask and the + processor mask of active logical processors for that group. + If the calling process contains threads in multiple groups, + the function returns zero for both affinity masks + + note: tested in Win10: GetProcessAffinityMask() doesn't return 0 + in (processAffinityMask) and (systemAffinityMask) masks. + We need to test it in Win11: how to get mask==0 from GetProcessAffinityMask()? + */ + if (!res) + { + processAffinityMask = 0; + systemAffinityMask = 0; + } + if (Groups.GroupSizes.Size() > 1 && Groups.NumThreadsTotal) + if (// !res || + processAffinityMask == 0 || // to support case described in DOCs and for (!res) case + processAffinityMask == systemAffinityMask) // for default nonchanged affinity + { + // we set IsGroupMode only if processAffinity is default (not changed). + res = TRUE; + IsGroupMode = true; + } + return res; } +UInt32 CProcessAffinity::Load_and_GetNumberOfThreads() +{ + if (Get()) + { + const UInt32 numProcessors = GetNumProcessThreads(); + if (numProcessors) + return numProcessors; + } + SYSTEM_INFO systemInfo; + GetSystemInfo(&systemInfo); + // the number of logical processors in the current group + return systemInfo.dwNumberOfProcessors; +} + UInt32 GetNumberOfProcessors() { // We need to know how many threads we can use. // By default the process is assigned to one group. - // So we get the number of logical processors (threads) - // assigned to current process in the current group. - // Group size can be smaller than total number logical processors, for exammple, 2x36 - CProcessAffinity pa; - - if (pa.Get() && pa.processAffinityMask != 0) - return pa.GetNumProcessThreads(); - - SYSTEM_INFO systemInfo; - GetSystemInfo(&systemInfo); - // the number of logical processors in the current group - return (UInt32)systemInfo.dwNumberOfProcessors; + return pa.Load_and_GetNumberOfThreads(); } #else diff --git a/CPP/Windows/System.h b/CPP/Windows/System.h index c79b010..7a7fa1b 100644 --- a/CPP/Windows/System.h +++ b/CPP/Windows/System.h @@ -9,6 +9,7 @@ #endif #include "../Common/MyTypes.h" +#include "../Common/MyVector.h" #include "../Common/MyWindows.h" namespace NWindows { @@ -16,6 +17,34 @@ namespace NSystem { #ifdef _WIN32 +struct CCpuGroups +{ + CRecordVector GroupSizes; + UInt32 NumThreadsTotal; // sum of threads in all groups + // bool Is_Win11_Groups; // useless + + void Get_GroupSize_Min_Max(UInt32 &minSize, UInt32 &maxSize) const + { + unsigned num = GroupSizes.Size(); + UInt32 minSize2 = 0, maxSize2 = 0; + if (num) + { + minSize2 = (UInt32)0 - 1; + do + { + const UInt32 v = GroupSizes[--num]; + if (minSize2 > v) minSize2 = v; + if (maxSize2 < v) maxSize2 = v; + } + while (num); + } + minSize = minSize2; + maxSize = maxSize2; + } + bool Load(); + CCpuGroups(): NumThreadsTotal(0) {} +}; + UInt32 CountAffinity(DWORD_PTR mask); struct CProcessAffinity @@ -25,14 +54,28 @@ struct CProcessAffinity DWORD_PTR processAffinityMask; DWORD_PTR systemAffinityMask; + CCpuGroups Groups; + bool IsGroupMode; + /* + IsGroupMode == true, if + Groups.GroupSizes.Size() > 1) && { dafalt affinity was not changed } + IsGroupMode == false, if single group or affinity was changed + */ + + UInt32 Load_and_GetNumberOfThreads(); + void InitST() { // numProcessThreads = 1; // numSysThreads = 1; processAffinityMask = 1; systemAffinityMask = 1; + IsGroupMode = false; + // Groups.NumThreadsTotal = 0; + // Groups.Is_Win11_Groups = false; } +/* void CpuZero() { processAffinityMask = 0; @@ -42,9 +85,23 @@ struct CProcessAffinity { processAffinityMask |= ((DWORD_PTR)1 << cpuIndex); } +*/ - UInt32 GetNumProcessThreads() const { return CountAffinity(processAffinityMask); } - UInt32 GetNumSystemThreads() const { return CountAffinity(systemAffinityMask); } + UInt32 GetNumProcessThreads() const + { + if (IsGroupMode) + return Groups.NumThreadsTotal; + // IsGroupMode == false + // so we don't want to use groups + // we return number of threads in default primary group: + return CountAffinity(processAffinityMask); + } + UInt32 GetNumSystemThreads() const + { + if (Groups.GroupSizes.Size() > 1 && Groups.NumThreadsTotal) + return Groups.NumThreadsTotal; + return CountAffinity(systemAffinityMask); + } BOOL Get(); diff --git a/CPP/Windows/Thread.h b/CPP/Windows/Thread.h index 013e5fd..9e662fe 100644 --- a/CPP/Windows/Thread.h +++ b/CPP/Windows/Thread.h @@ -26,8 +26,10 @@ public: { return Thread_Create_With_Affinity(&thread, startAddress, param, affinity); } WRes Create_With_CpuSet(THREAD_FUNC_TYPE startAddress, LPVOID param, const CCpuSet *cpuSet) { return Thread_Create_With_CpuSet(&thread, startAddress, param, cpuSet); } - - #ifdef _WIN32 + +#ifdef _WIN32 + WRes Create_With_Group(THREAD_FUNC_TYPE startAddress, LPVOID param, unsigned group, CAffinityMask affinity = 0) + { return Thread_Create_With_Group(&thread, startAddress, param, group, affinity); } operator HANDLE() { return thread; } void Attach(HANDLE handle) { thread = handle; } HANDLE Detach() { HANDLE h = thread; thread = NULL; return h; } @@ -36,7 +38,7 @@ public: bool Terminate(DWORD exitCode) { return BOOLToBool(::TerminateThread(thread, exitCode)); } int GetPriority() { return ::GetThreadPriority(thread); } bool SetPriority(int priority) { return BOOLToBool(::SetThreadPriority(thread, priority)); } - #endif +#endif }; } diff --git a/CPP/Windows/TimeUtils.cpp b/CPP/Windows/TimeUtils.cpp index 04b230f..3bfc173 100644 --- a/CPP/Windows/TimeUtils.cpp +++ b/CPP/Windows/TimeUtils.cpp @@ -258,8 +258,9 @@ bool GetSecondsSince1601(unsigned year, unsigned month, unsigned day, FreeBSD 11.0, NetBSD 7.1, OpenBSD 6.0, Minix 3.1.8, AIX 7.1, HP-UX 11.31, IRIX 6.5, Solaris 11.3, Cygwin 2.9, mingw, MSVC 14, Android 9.0. + Android NDK defines TIME_UTC but doesn't have the timespec_get(). */ -#if defined(TIME_UTC) +#if defined(TIME_UTC) && !defined(__ANDROID__) #define ZIP7_USE_timespec_get // #pragma message("ZIP7_USE_timespec_get") #elif defined(CLOCK_REALTIME) diff --git a/DOC/lzma-history.txt b/DOC/lzma-history.txt index e7b689f..96da8bf 100644 --- a/DOC/lzma-history.txt +++ b/DOC/lzma-history.txt @@ -1,6 +1,23 @@ HISTORY of the LZMA SDK ----------------------- +25.01 2025-08-03 +------------------------- +- The code for handling symbolic links has been changed + to provide greater security when extracting files from archives. + Command line switch -snld20 can be used to bypass default security + checks when creating symbolic links. + + +25.00 2025-07-05 +------------------------- +- 7-Zip for Windows can now use more than 64 CPU threads for compression + to zip/7z/xz archives and for the 7-Zip benchmark. + If there are more than one processor group in Windows (on systems with more than + 64 cpu threads), 7-Zip distributes running CPU threads across different processor groups. +- fixed some bugs and vulnerabilities. + + 24.09 2024-11-29 ------------------------- - The default dictionary size values for LZMA/LZMA2 compression methods were increased: diff --git a/DOC/lzma-sdk.txt b/DOC/lzma-sdk.txt index 822cddf..57279bf 100644 --- a/DOC/lzma-sdk.txt +++ b/DOC/lzma-sdk.txt @@ -1,4 +1,4 @@ -LZMA SDK 24.09 +LZMA SDK 25.01 -------------- LZMA SDK provides the documentation, samples, header files, @@ -59,6 +59,34 @@ LZMA SDK Contents - console programs for lzma / 7z / xz compression and decompression - SFX modules for installers. +How to compile with makefile in Windows +--------------------------------------- + +Some macronames can be defined for compiling with makefile: + +PLATFORM + with possible values: x64, x86, arm64, arm, ia64 + +OLD_COMPILER + for old VC compiler, like MSCV 6.0. + +MY_DYNAMIC_LINK + for dynamic linking to the run-time library (msvcrt.dll). + The default makefile option is static linking to the run-time library. + +To compile 7zr.exe file for x64 with Visual Studio 2022, +use the following command sequence: + + cd SRC\CPP\7zip\Bundles\Alone7z\ + %comspec% /k "C:\Program Files\VS2022\VC\Auxiliary\Build\vcvars64.bat" + nmake + +You can use other "vcvars*.bat" files from the "VS2022\VC\Auxiliary\Build" directory +to compile for other platforms: + vcvars64.bat + vcvarsamd64_arm64.bat + vcvarsamd64_x86.bat + UNIX/Linux version ------------------ diff --git a/README.OpenSource b/README.OpenSource index acfe7b8..3ccb075 100644 --- a/README.OpenSource +++ b/README.OpenSource @@ -3,9 +3,9 @@ "Name": "7 Zip - LZMA SDK", "License": "Public domain", "License File": "LICENSE", - "Version Number": "24.09", + "Version Number": "25.01", "Owner": "zangleizhen@huawei.com", - "Upstream URL": "https://7-zip.org/a/lzma2409.7z", + "Upstream URL": "https://7-zip.org/a/lzma2501.7z", "Description": "LZMA is default and general compression method of 7z and xz format." } ]