From d32d232e05ae242c59744c541da07fbb6a923910 Mon Sep 17 00:00:00 2001 From: Roland van Laar Date: Sun, 22 Aug 2021 00:31:10 +0200 Subject: [PATCH] DEVTOOLS: DUMPERCOMPANION: handle all filenames Always punyencode filenames when the filename contains a char that should be escaped. This makes it work better with windows. Windows has a short list of chars that not allowed: https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions --- devtools/dumper-companion.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/devtools/dumper-companion.py b/devtools/dumper-companion.py index f73d078fd27..4bed0cc2891 100755 --- a/devtools/dumper-companion.py +++ b/devtools/dumper-companion.py @@ -163,6 +163,14 @@ def file_to_macbin(f: machfs.File, name: ByteString) -> bytes: def escape_string(s: str) -> str: + """ + Escape strings + + Escape the following: + - escape char: \x81 + - unallowed filename chars: https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions + - control chars < 0x20 + """ new_name = "" for char in s: if char == "\x81": @@ -174,6 +182,14 @@ def escape_string(s: str) -> str: return new_name +def needs_punyencoding(orig: str) -> bool: + """ + Filenames need punyencoding when it contains a char that should be + escaped. + """ + return orig != escape_string(orig) + + def punyencode(orig: str) -> str: s = escape_string(orig) encoded = s.encode("punycode").decode("ascii") @@ -219,8 +235,9 @@ def extract_volume(args: argparse.Namespace) -> int: for el in hpath: if japanese: el = decode_macjapanese(el.encode("mac_roman")) - if punify: + if punify or needs_punyencoding(el): el = punyencode(el) + upath /= el if isinstance(obj, machfs.Folder): @@ -454,6 +471,12 @@ def test_decode_name(): assert punyencode(input) == expected +def test_needs_punyencoding(): + checks = [["Icon\r", True], ["ascii", False], ["バッドデイ(Power PC)", False]] + for input, expected in checks: + assert needs_punyencoding(input) == expected + + def test_escape_string(): checks = [["\r", "\x81\x8d"], ["\x81", "\x81\x79"]] for input, expected in checks: