scummvm/devtools/dumper-companion.py

#!/usr/bin/env python3
#
# prerequisites: pip3 install machfs
#
# Development information:
# This file contains tests. They can be run with:
#  $ pytest dumper-companion.py
#
# Code is formatted with blacks

import argparse
import io
import os
import sys
import re
from binascii import crc_hqx
from pathlib import Path
from struct import pack
from typing import Any, List, Tuple

import machfs


def file_to_macbin(f: machfs.File, name: str, encoding: str) -> bytes:
    oldFlags = f.flags >> 8
    newFlags = f.flags & 8
    macbin = pack(
        ">xB63s4s4sBxHHHBxIIIIHB14xIHBB",
        len(name),  # TODO: shouldn't this be the encoded file length?
        name.encode(encoding),
        f.type,
        f.creator,
        oldFlags,
        0,
        0,
        0,
        f.locked,
        len(f.data),
        len(f.rsrc),
        f.crdate,  # TODO: dates are wrong, investigate
        f.mddate,  # TODO: dates are wrong, investigate
        0,
        newFlags,
        0,
        0,
        129,
        129,
    )
    macbin += pack(">H2x", crc_hqx(macbin, 0))
    if f.data:
        macbin += f.data
        macbin += b"\x00" * (128 - len(f.data) % 128)

    if f.rsrc:
        macbin += f.rsrc
        macbin += b"\x00" * (128 - len(f.rsrc) % 128)

    return macbin


def escape_string(s: str) -> str:
    new_name = ""
    for char in s:
        if char == "\x81":
            new_name += "\x81\x79"
        if char in '/":*[]+|\\?%<>,;=' or ord(char) < 0x20:
            new_name += "\x81" + chr(0x80 + ord(char))
        else:
            new_name += char
    return new_name


def punyencode(orig: str, encoding: str = "mac_roman") -> str:
    s = orig.encode("mac_roman").decode(encoding)
    s = escape_string(s)
    encoded = s.encode("punycode").decode("ascii")
    # punyencoding adds an '-' at the end when there are no special chars
    # don't use it for comparing
    if orig != encoded[:-1]:
        return "xn--" + encoded
    return orig


def encode_string(args: argparse.Namespace) -> None:
    if args.string:
        var = args.string
    if args.stdin:
        var = input()
    print(punyencode(var))


def generate_punyencoded_path(
    destination_dir: Path, encoding: str, hpath: Tuple[str]
) -> Path:
    """Convert a filepath to a punyencoded one"""
    upath = destination_dir

    for el in hpath:
        upath /= punyencode(el, encoding=encoding)
    return upath


def extract_volume(args: argparse.Namespace) -> None:
    source_volume: Path = args.src
    destination_dir: Path = args.dir
    encoding: str = args.e

    print(f"Loading {source_volume} ...")
    vol = machfs.Volume()
    vol.read(source_volume.read_bytes())

    destination_dir.mkdir(parents=True, exist_ok=True)

    for hpath, obj in vol.iter_paths():
        upath = generate_punyencoded_path(destination_dir, encoding, hpath)

        if isinstance(obj, machfs.Folder):
            upath.mkdir(exist_ok=True)
        else:
            print(upath)
            file = obj.data
            if obj.rsrc:
                file = file_to_macbin(obj, hpath[-1], encoding=encoding)
            upath.write_bytes(file)


def has_resource_fork(dirpath: str, filename: str) -> bool:
    """
    Check if file has a resource fork

    Ease of compatibility between macOS and linux
    """
    filepath = os.path.join(dirpath, filename)
    return os.path.exists(os.path.join(filepath, "..namedfork/rsrc"))


def collect_forks(args: argparse.Namespace) -> None:
    """
    Collect resource forks and move them to a macbinary file

    - combine them with the data fork when it's available
    - punyencode the filename when requested
    """
    directory: Path = args.dir
    punify: bool = args.punycode
    count_resources = 0
    count_renames = 0
    for dirpath, _, filenames in os.walk(directory):
        for filename in filenames:
            if has_resource_fork(dirpath, filename):
                print(f"Resource in {filename}")
                count_resources += 1
                resource_filename = filename + "/..namedfork/rsrc"
                to_filename = filename
                if punify:
                    tmp = punyencode(to_filename)
                    if tmp != to_filename:
                        print(f"Renamed {to_filename} to {tmp}")
                        count_renames += 1
                    to_filename = tmp

                file = machfs.File()

                # Set the file times and convert them to Mac epoch
                info = os.stat(filename)
                file.crdate = 2082844800 + int(info.st_birthtime)
                file.mddate = 2082844800 + int(info.st_mtime)

                # Get info on creator and type
                tmp = os.popen("GetFileInfo \"" + os.path.join(dirpath, filename) + "\"").read()

                groups = re.search(r"type: \"(.*)\"\ncreator: \"(.*)\"", tmp)

                # We may have here "\0\0\0\0"
                file.type = groups.group(1).encode().decode('unicode-escape').encode()
                file.creator = groups.group(2).encode().decode('unicode-escape').encode()

                with open(os.path.join(dirpath, resource_filename), "rb") as rsrc:
                    file.rsrc = rsrc.read()
                with open(os.path.join(dirpath, filename), "rb") as data:
                    file.data = data.read()
                with open(os.path.join(dirpath, to_filename), "wb") as to_file:
                    to_file.write(
                        file_to_macbin(file, to_filename, encoding="mac_roman")
                    )

                    if to_filename != filename:
                        os.remove(filename) # Remove the original file

                    os.utime(os.path.join(dirpath, to_filename), (info.st_mtime, info.st_mtime))
            elif punify:
                punified_filename = punyencode(filename)
                if punified_filename != filename:
                    print(f"Renamed {to_filename} to {punified_filename}")
                    count_renames += 1
                    os.rename(
                        os.path.join(dirpath, tmp),
                        os.path.join(dirpath, punified_filename),
                    )

    print(f"Macbinary {count_resources}, Renamed {count_renames} files")


def generate_parser() -> argparse.ArgumentParser:
    """
    Generate the parser

    The parser is split into multiple subparsers.
    One for each mode we support.

    Each subparser has a default function that handles that mode.
    """
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers()
    parser_iso = subparsers.add_parser("iso", help="Dump hfs isos")

    parser_iso.add_argument(
        "-e",
        metavar="ENCODING",
        type=str,
        default="mac_roman",
        help="String encoding (see https://docs.python.org/3/library/codecs.html#standard-encodings)",
    )
    parser_iso.add_argument("src", metavar="INPUT", type=Path, help="Disk image")
    parser_iso.add_argument(
        "dir", metavar="OUTPUT", type=Path, help="Destination folder"
    )
    parser_iso.set_defaults(func=extract_volume)

    parser_str = subparsers.add_parser("str", help="Punyencode strings or standard in")
    parser_str.add_argument(
        "--stdin", action="store_true", help="Convert stdin to punycode"
    )
    parser_str.add_argument(
        "string",
        metavar="STRING",
        type=str,
        help="Convert string to punycode",
        nargs="?",
    )
    parser_str.set_defaults(func=encode_string)

    if sys.platform == "darwin":
        parser_macbinary = subparsers.add_parser(
            "mac",
            help="MacOS only: Operate in MacBinary encoding mode. Recursively encode all resource forks in the current directory",
        )
        parser_macbinary.add_argument(
            "--punycode", action="store_true", help="encode pathnames into punycode"
        )
        parser_macbinary.add_argument(
            "dir", metavar="directory", type=Path, help="input directory"
        )
        parser_macbinary.set_defaults(func=collect_forks)

    return parser


if __name__ == "__main__":
    parser = generate_parser()
    args = parser.parse_args()
    args.func(args)

### Test functions


def call_test_parser(input_args: List[str]) -> Any:
    """Helper function to call the parser"""
    parser = generate_parser()
    args = parser.parse_args(input_args)
    args.func(args)


def test_encode_string(capsys):
    call_test_parser(["str", "Icon\r"])
    captured = capsys.readouterr()
    assert captured.out == "xn--Icon-ja6e\n"


def test_encode_stdin(capsys, monkeypatch):
    monkeypatch.setattr("sys.stdin", io.StringIO("Icon\r"))
    call_test_parser(["str", "--stdin"])
    captured = capsys.readouterr()
    assert captured.out == "xn--Icon-ja6e\n"


def test_decode_name():
    checks = [["Icon\r", "xn--Icon-ja6e"]]
    for input, expected in checks:
        assert punyencode(input, "mac_roman") == expected


def test_escape_string():
    checks = [["\r", "\x81\x8d"], ["\x81", "\x81\x79\x81"]]
    for input, expected in checks:
        assert escape_string(input) == expected