scummvm/devtools/dumper-companion.py

297 lines
8.9 KiB
Python
Executable File

#!/usr/bin/env python3
#
# prerequisites: pip3 install machfs
#
# Development information:
# This file contains tests. They can be run with:
# $ pytest dumper-companion.py
#
# Code is formatted with blacks
import argparse
import io
import os
import sys
import re
from binascii import crc_hqx
from pathlib import Path
from struct import pack
from typing import Any, List, Tuple
import machfs
def file_to_macbin(f: machfs.File, name: str, encoding: str) -> bytes:
oldFlags = f.flags >> 8
newFlags = f.flags & 8
macbin = pack(
">xB63s4s4sBxHHHBxIIIIHB14xIHBB",
len(name), # TODO: shouldn't this be the encoded file length?
name.encode(encoding),
f.type,
f.creator,
oldFlags,
0,
0,
0,
f.locked,
len(f.data),
len(f.rsrc),
f.crdate, # TODO: dates are wrong, investigate
f.mddate, # TODO: dates are wrong, investigate
0,
newFlags,
0,
0,
129,
129,
)
macbin += pack(">H2x", crc_hqx(macbin, 0))
if f.data:
macbin += f.data
macbin += b"\x00" * (128 - len(f.data) % 128)
if f.rsrc:
macbin += f.rsrc
macbin += b"\x00" * (128 - len(f.rsrc) % 128)
return macbin
def escape_string(s: str) -> str:
new_name = ""
for char in s:
if char == "\x81":
new_name += "\x81\x79"
if char in '/":*[]+|\\?%<>,;=' or ord(char) < 0x20:
new_name += "\x81" + chr(0x80 + ord(char))
else:
new_name += char
return new_name
def punyencode(orig: str, encoding: str = "mac_roman") -> str:
s = orig.encode("mac_roman").decode(encoding)
s = escape_string(s)
encoded = s.encode("punycode").decode("ascii")
# punyencoding adds an '-' at the end when there are no special chars
# don't use it for comparing
if orig != encoded[:-1]:
return "xn--" + encoded
return orig
def encode_string(args: argparse.Namespace) -> None:
if args.string:
var = args.string
if args.stdin:
var = input()
print(punyencode(var))
def generate_punyencoded_path(
destination_dir: Path, encoding: str, hpath: Tuple[str]
) -> Path:
"""Convert a filepath to a punyencoded one"""
upath = destination_dir
for el in hpath:
upath /= punyencode(el, encoding=encoding)
return upath
def extract_volume(args: argparse.Namespace) -> None:
source_volume: Path = args.src
destination_dir: Path = args.dir
encoding: str = args.e
print(f"Loading {source_volume} ...")
vol = machfs.Volume()
vol.read(source_volume.read_bytes())
destination_dir.mkdir(parents=True, exist_ok=True)
for hpath, obj in vol.iter_paths():
upath = generate_punyencoded_path(destination_dir, encoding, hpath)
if isinstance(obj, machfs.Folder):
upath.mkdir(exist_ok=True)
else:
print(upath)
file = obj.data
if obj.rsrc:
file = file_to_macbin(obj, hpath[-1], encoding=encoding)
upath.write_bytes(file)
def has_resource_fork(dirpath: str, filename: str) -> bool:
"""
Check if file has a resource fork
Ease of compatibility between macOS and linux
"""
filepath = os.path.join(dirpath, filename)
return os.path.exists(os.path.join(filepath, "..namedfork/rsrc"))
def collect_forks(args: argparse.Namespace) -> None:
"""
Collect resource forks and move them to a macbinary file
- combine them with the data fork when it's available
- punyencode the filename when requested
"""
directory: Path = args.dir
punify: bool = args.punycode
count_resources = 0
count_renames = 0
for dirpath, _, filenames in os.walk(directory):
for filename in filenames:
if has_resource_fork(dirpath, filename):
print(f"Resource in {filename}")
count_resources += 1
resource_filename = filename + "/..namedfork/rsrc"
to_filename = filename
if punify:
tmp = punyencode(to_filename)
if tmp != to_filename:
print(f"Renamed {to_filename} to {tmp}")
count_renames += 1
to_filename = tmp
file = machfs.File()
# Set the file times and convert them to Mac epoch
info = os.stat(filename)
file.crdate = 2082844800 + int(info.st_birthtime)
file.mddate = 2082844800 + int(info.st_mtime)
# Get info on creator and type
tmp = os.popen("GetFileInfo \"" + os.path.join(dirpath, filename) + "\"").read()
groups = re.search(r"type: \"(.*)\"\ncreator: \"(.*)\"", tmp)
# We may have here "\0\0\0\0"
file.type = groups.group(1).encode().decode('unicode-escape').encode()
file.creator = groups.group(2).encode().decode('unicode-escape').encode()
with open(os.path.join(dirpath, resource_filename), "rb") as rsrc:
file.rsrc = rsrc.read()
with open(os.path.join(dirpath, filename), "rb") as data:
file.data = data.read()
with open(os.path.join(dirpath, to_filename), "wb") as to_file:
to_file.write(
file_to_macbin(file, to_filename, encoding="mac_roman")
)
if to_filename != filename:
os.remove(filename) # Remove the original file
os.utime(os.path.join(dirpath, to_filename), (info.st_mtime, info.st_mtime))
elif punify:
punified_filename = punyencode(filename)
if punified_filename != filename:
print(f"Renamed {to_filename} to {punified_filename}")
count_renames += 1
os.rename(
os.path.join(dirpath, tmp),
os.path.join(dirpath, punified_filename),
)
print(f"Macbinary {count_resources}, Renamed {count_renames} files")
def generate_parser() -> argparse.ArgumentParser:
"""
Generate the parser
The parser is split into multiple subparsers.
One for each mode we support.
Each subparser has a default function that handles that mode.
"""
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers()
parser_iso = subparsers.add_parser("iso", help="Dump hfs isos")
parser_iso.add_argument(
"-e",
metavar="ENCODING",
type=str,
default="mac_roman",
help="String encoding (see https://docs.python.org/3/library/codecs.html#standard-encodings)",
)
parser_iso.add_argument("src", metavar="INPUT", type=Path, help="Disk image")
parser_iso.add_argument(
"dir", metavar="OUTPUT", type=Path, help="Destination folder"
)
parser_iso.set_defaults(func=extract_volume)
parser_str = subparsers.add_parser("str", help="Punyencode strings or standard in")
parser_str.add_argument(
"--stdin", action="store_true", help="Convert stdin to punycode"
)
parser_str.add_argument(
"string",
metavar="STRING",
type=str,
help="Convert string to punycode",
nargs="?",
)
parser_str.set_defaults(func=encode_string)
if sys.platform == "darwin":
parser_macbinary = subparsers.add_parser(
"mac",
help="MacOS only: Operate in MacBinary encoding mode. Recursively encode all resource forks in the current directory",
)
parser_macbinary.add_argument(
"--punycode", action="store_true", help="encode pathnames into punycode"
)
parser_macbinary.add_argument(
"dir", metavar="directory", type=Path, help="input directory"
)
parser_macbinary.set_defaults(func=collect_forks)
return parser
if __name__ == "__main__":
parser = generate_parser()
args = parser.parse_args()
args.func(args)
### Test functions
def call_test_parser(input_args: List[str]) -> Any:
"""Helper function to call the parser"""
parser = generate_parser()
args = parser.parse_args(input_args)
args.func(args)
def test_encode_string(capsys):
call_test_parser(["str", "Icon\r"])
captured = capsys.readouterr()
assert captured.out == "xn--Icon-ja6e\n"
def test_encode_stdin(capsys, monkeypatch):
monkeypatch.setattr("sys.stdin", io.StringIO("Icon\r"))
call_test_parser(["str", "--stdin"])
captured = capsys.readouterr()
assert captured.out == "xn--Icon-ja6e\n"
def test_decode_name():
checks = [["Icon\r", "xn--Icon-ja6e"]]
for input, expected in checks:
assert punyencode(input, "mac_roman") == expected
def test_escape_string():
checks = [["\r", "\x81\x8d"], ["\x81", "\x81\x79\x81"]]
for input, expected in checks:
assert escape_string(input) == expected