Bug 1835431 - Handle hardlinks when unpacking a SDK. r=gsvelto

Differential Revision: https://phabricator.services.mozilla.com/D179281
This commit is contained in:
Mike Hommey 2023-05-31 22:24:34 +00:00
parent 59070c7a5d
commit c6281d6814
4 changed files with 37 additions and 9 deletions

View File

@ -42,7 +42,7 @@ def show_package_content(url, digest=None, size=None):
# sys.exit(1)
for name, content in unxar(BytesIO(package)):
if name == "Payload":
for path, mode, _ in uncpio(Pbzx(content)):
for path, _, __ in uncpio(Pbzx(content)):
if path:
print(path.decode("utf-8"))

View File

@ -14,6 +14,8 @@ import struct
import zlib
from xml.etree.ElementTree import XML
from mozbuild.util import ReadOnlyNamespace
class ZlibFile(object):
def __init__(self, fileobj):
@ -179,6 +181,8 @@ def uncpio(fileobj):
namesize,
filesize,
) = struct.unpack(">6s6s6s6s6s6s6s11s6s11s", header)
dev = int(dev, 8)
ino = int(ino, 8)
mode = int(mode, 8)
nlink = int(nlink, 8)
namesize = int(namesize, 8)
@ -197,7 +201,7 @@ def uncpio(fileobj):
if name.startswith(b"/"):
name = name[1:]
content = Take(fileobj, filesize)
yield name, mode, content
yield name, ReadOnlyNamespace(mode=mode, nlink=nlink, dev=dev, ino=ino), content
# Ensure the content is totally consumed
while content.read(4096):
pass

View File

@ -8,6 +8,7 @@ import shutil
import stat
import sys
import tempfile
from io import BytesIO
from urllib.request import urlopen
from mozpack.macpkg import Pbzx, uncpio, unxar
@ -36,27 +37,50 @@ def unpack_sdk(url, sha256, extract_prefix, out_dir="."):
def extract_payload(fileobj, extract_prefix, out_dir="."):
for path, mode, content in uncpio(Pbzx(fileobj)):
hardlinks = {}
for path, st, content in uncpio(Pbzx(fileobj)):
# When there are hardlinks, normally a cpio stream is supposed to
# contain the data for all of them, but, even with compression, that
# can be a waste of space, so in some cpio streams (*cough* *cough*,
# Apple's, e.g. in Xcode), the files after the first one have dummy
# data.
# As we may be filtering the first file out (if it doesn't match
# extract_prefix), we need to keep its data around (we're not going
# to be able to rewind).
# We could do something fancy in the case where the first file is not
# filtered out, but in practice, it's not worth the extra complexity.
if stat.S_ISREG(st.mode) and st.nlink > 1:
key = (st.dev, st.ino)
hardlink = hardlinks.get(key)
if hardlink:
hardlink[0] -= 1
if hardlink[0] == 0:
del hardlinks[key]
else:
hardlink = hardlinks[key] = [st.nlink - 1, BytesIO(content.read())]
content = hardlink[1]
content.seek(0)
if not path:
continue
path = path.decode()
if not path.startswith(extract_prefix):
continue
path = os.path.join(out_dir, path[len(extract_prefix) :].lstrip("/"))
if stat.S_ISDIR(mode):
if stat.S_ISDIR(st.mode):
os.makedirs(path, exist_ok=True)
else:
parent = os.path.dirname(path)
if parent:
os.makedirs(parent, exist_ok=True)
if stat.S_ISLNK(mode):
if stat.S_ISLNK(st.mode):
os.symlink(content.read(), path)
elif stat.S_ISREG(mode):
elif stat.S_ISREG(st.mode):
with open(path, "wb") as out:
shutil.copyfileobj(content, out)
else:
raise Exception(f"File mode {mode:o} is not supported")
raise Exception(f"File mode {st.mode:o} is not supported")
if __name__ == "__main__":

View File

@ -181,8 +181,8 @@ def extract_payload(payload_path, output_path):
elif header == b"pb":
logging.info("Extracting pbzx payload")
for path, mode, content in uncpio(Pbzx(open(payload_path, "rb"))):
if not path or not stat.S_ISREG(mode):
for path, st, content in uncpio(Pbzx(open(payload_path, "rb"))):
if not path or not stat.S_ISREG(st.mode):
continue
out = os.path.join(output_path, path.decode())
os.makedirs(os.path.dirname(out), exist_ok=True)