mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-24 13:21:05 +00:00
Bug 1761150: Mach should use fresh system python, don't inherit sys.path r=ahal
Previously, when using the system Python packages, Mach would reuse the values already existing in the `sys.path`. This had two benefits: 1. We didn't have to do work to calculate which paths the "system Python" specifically referred to. 2. This allowed us to support nested Mach calls (such as `./mach --virtualenv psutil ./mach build`). However, it came with its own headaches, specifically around "consistent imports" and Python subprocesses, such as in the following example 1. Mach runs "using system Python" 2. The "build" site (for example) would be activated. The current `sys.path` is included in the virtualenv's `mach.pth` file so that subprocesses will have access to the same packages as the primary Mach process 3. //Something// adds something to the `sys.path` 4. The build virtualenv is redundantly re-activated (such as due to generic setup code for some module). Though we don't physically re-activate the virtualenv, we //do// check that it's up-to-date to validate assumptions - **and it's not!** It's missing the //something// that was added to the `sys.path`. 5. We can't re-create the build virtualenv because it's already active, so: 6. An error is raised so that unexpected out-of-date-ness doesn't fly under the radar. ----- This patch solves this by calculating the "system Python" paths in a deterministic way. Then, even when using the system Python, the Mach and command sites use define themselves in a consistent way. This means that we can't do `./mach --virtualenv <venv> ./mach ...` to shares `venv`'s packages with the whole new Mach process. Fortunately, this has been replaced with moving such packages into the nested Mach command's requirements definition instead. TL;DR: more consistent, less failures. 👍 ----- One more detail, this time around the `sys_path()` function: Ideally, we could calculate the standard library paths *and* the "system" paths with only one Python subprocess. Unfortunately, that's not the case: * If `-S` isn't provided, then it's tricky to separate the standard library paths from the system paths * If `-S` is provided, then doing `site.getsitepackages()` in a virtualenv returns the *system* site packages, not the virtualenv's. To work around this, we call the external python twice, and in parallel. This allows resolving the information we need while avoiding performance costs. Differential Revision: https://phabricator.services.mozilla.com/D143200
This commit is contained in:
parent
7f0d8e4745
commit
15510f9f26
@ -17,7 +17,6 @@ import shutil
|
||||
import site
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
import sysconfig
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
@ -320,12 +319,8 @@ class MachSiteManager:
|
||||
if self._site_packages_source == SitePackagesSource.NONE:
|
||||
return SiteUpToDateResult(True)
|
||||
elif self._site_packages_source == SitePackagesSource.SYSTEM:
|
||||
pthfile_lines = [
|
||||
*self._requirements.pths_as_absolute(self._topsrcdir),
|
||||
*sys.path,
|
||||
]
|
||||
_assert_pip_check(
|
||||
self._topsrcdir, pthfile_lines, "mach", self._requirements
|
||||
self._topsrcdir, self._sys_path(), "mach", self._requirements
|
||||
)
|
||||
return SiteUpToDateResult(True)
|
||||
elif self._site_packages_source == SitePackagesSource.VENV:
|
||||
@ -367,35 +362,17 @@ class MachSiteManager:
|
||||
if self._site_packages_source == SitePackagesSource.VENV
|
||||
else sys.executable,
|
||||
):
|
||||
if self._site_packages_source == SitePackagesSource.SYSTEM:
|
||||
# Add our Mach modules to the sys.path.
|
||||
# Note that "[0:0]" is used to ensure that Mach's modules are prioritized
|
||||
# over the system modules. Since Mach-env activation happens so early in
|
||||
# the Mach lifecycle, we can assume that no system packages have been
|
||||
# imported yet, and this is a safe operation to do.
|
||||
sys.path[0:0] = self._requirements.pths_as_absolute(self._topsrcdir)
|
||||
|
||||
# Remove duplicates (most likely to creep in during nested Mach calls
|
||||
# that use the "system" python).
|
||||
sys.path = list(OrderedDict.fromkeys(sys.path))
|
||||
elif self._site_packages_source == SitePackagesSource.NONE:
|
||||
# Since the system packages aren't used, clean the sys.path
|
||||
# down to just the standard library.
|
||||
sys.path = list(self._metadata.original_python.stdlib_paths())
|
||||
sys.path[0:0] = self._requirements.pths_as_absolute(self._topsrcdir)
|
||||
elif self._site_packages_source == SitePackagesSource.VENV:
|
||||
# Don't activate Mach virtualenv if this Python process was already
|
||||
# started from the Mach virtualenv.
|
||||
if Path(sys.prefix) != Path(self._metadata.prefix):
|
||||
# Since the system packages aren't used, clean the sys.path
|
||||
# down to just the standard library.
|
||||
sys.path = list(self._metadata.original_python.stdlib_paths())
|
||||
|
||||
# Activate the Mach virtualenv in the current Python context. This
|
||||
# automatically adds the virtualenv's "site-packages" to our scope, in
|
||||
# addition to our first-party/vendored modules since they're specified
|
||||
# in the "mach.pth" file.
|
||||
activate_virtualenv(self._virtualenv())
|
||||
# Reset the sys.path to insulate ourselves from the environment.
|
||||
# This should be safe to do, since activation of the Mach site happens so
|
||||
# early in the Mach lifecycle that no packages should have been imported
|
||||
# from external sources yet.
|
||||
sys.path = self._sys_path()
|
||||
if self._site_packages_source == SitePackagesSource.VENV:
|
||||
# Activate the Mach virtualenv in the current Python context. This
|
||||
# automatically adds the virtualenv's "site-packages" to our scope, in
|
||||
# addition to our first-party/vendored modules since they're specified
|
||||
# in the "mach.pth" file.
|
||||
activate_virtualenv(self._virtualenv())
|
||||
|
||||
def _build(self):
|
||||
if self._site_packages_source != SitePackagesSource.VENV:
|
||||
@ -413,6 +390,27 @@ class MachSiteManager:
|
||||
self._metadata,
|
||||
)
|
||||
|
||||
def _sys_path(self):
|
||||
if self._site_packages_source == SitePackagesSource.SYSTEM:
|
||||
stdlib_paths, system_site_paths = self._metadata.original_python.sys_path()
|
||||
return [
|
||||
*stdlib_paths,
|
||||
*self._requirements.pths_as_absolute(self._topsrcdir),
|
||||
*system_site_paths,
|
||||
]
|
||||
elif self._site_packages_source == SitePackagesSource.NONE:
|
||||
stdlib_paths = self._metadata.original_python.sys_path_stdlib()
|
||||
return [
|
||||
*stdlib_paths,
|
||||
*self._requirements.pths_as_absolute(self._topsrcdir),
|
||||
]
|
||||
elif self._site_packages_source == SitePackagesSource.VENV:
|
||||
stdlib_paths = self._metadata.original_python.sys_path_stdlib()
|
||||
return [
|
||||
*stdlib_paths,
|
||||
# self._requirements will be added as part of the virtualenv activation.
|
||||
]
|
||||
|
||||
def _pthfile_lines(self, environment):
|
||||
return [
|
||||
# Prioritize vendored and first-party modules first.
|
||||
@ -702,28 +700,8 @@ class CommandSiteManager:
|
||||
mach_site_packages_source = self._mach_site_packages_source
|
||||
if mach_site_packages_source == SitePackagesSource.SYSTEM:
|
||||
# When Mach is using the system environment, add it next.
|
||||
stdlib_paths = self._metadata.original_python.stdlib_paths()
|
||||
system_sys_path = [p for p in sys.path if p not in stdlib_paths]
|
||||
|
||||
# When a virtualenv is activated, it implicitly adds some paths to the
|
||||
# sys.path. When this function is run from such an activated virtualenv,
|
||||
# we don't want to include its paths in the pthfile because they'd be
|
||||
# redundant - so, scrub them.
|
||||
# Note that some platforms include just a site's $site-packages-dir to the
|
||||
# sys.path, while other platforms (such as Windows) add the $prefix as well.
|
||||
# We can catch these cases by matching all paths that start with $prefix.
|
||||
prefix_normalized = os.path.normcase(
|
||||
os.path.normpath(self._virtualenv.prefix)
|
||||
)
|
||||
system_sys_path = [
|
||||
p
|
||||
for p in system_sys_path
|
||||
if not os.path.normcase(os.path.normpath(p)).startswith(
|
||||
prefix_normalized
|
||||
)
|
||||
]
|
||||
|
||||
lines.extend(system_sys_path)
|
||||
_, system_site_paths = self._metadata.original_python.sys_path()
|
||||
lines.extend(system_site_paths)
|
||||
elif mach_site_packages_source == SitePackagesSource.VENV:
|
||||
# When Mach is using its on-disk virtualenv, add its site-packages directory.
|
||||
assert self._mach_virtualenv_root
|
||||
@ -738,9 +716,6 @@ class CommandSiteManager:
|
||||
# source to import from.
|
||||
lines.extend(_deprioritize_venv_packages(self._virtualenv))
|
||||
|
||||
# De-duplicate
|
||||
lines = list(OrderedDict.fromkeys(lines))
|
||||
|
||||
# Note that an on-disk virtualenv is always created for commands, even if they
|
||||
# are using the system as their site-packages source. This is to support use
|
||||
# cases where a fresh Python process must be created, but it also must have
|
||||
@ -804,10 +779,13 @@ class PythonVirtualenv:
|
||||
return os.path.join(normalized_venv_root, relative_path)
|
||||
|
||||
def site_packages_dirs(self):
|
||||
return [
|
||||
self.resolve_sysconfig_packages_path("purelib"),
|
||||
self.resolve_sysconfig_packages_path("platlib"),
|
||||
]
|
||||
# De-dupe with set literal in case purelib and platlib are identical
|
||||
return list(
|
||||
{
|
||||
self.resolve_sysconfig_packages_path("purelib"),
|
||||
self.resolve_sysconfig_packages_path("platlib"),
|
||||
}
|
||||
)
|
||||
|
||||
def pip_install_with_constraints(self, pip_args):
|
||||
"""Create a pip constraints file or existing packages
|
||||
@ -935,19 +913,64 @@ class ExternalPythonSite:
|
||||
self.python_path = python_executable
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def stdlib_paths(self):
|
||||
stdlib_paths = subprocess.check_output(
|
||||
def sys_path(self):
|
||||
"""Return lists of sys.path entries: one for standard library, one for the site
|
||||
|
||||
These two lists are calculated at the same time so that we can interpret them
|
||||
in a single Python subprocess, as running a whole Python instance is
|
||||
very expensive in the context of Mach initialization.
|
||||
"""
|
||||
env = {
|
||||
k: v
|
||||
for k, v in os.environ.items()
|
||||
# Don't include items injected by IDEs into the system path.
|
||||
if k not in ("PYTHONPATH", "PYDEVD_LOAD_VALUES_ASYNC")
|
||||
}
|
||||
stdlib = subprocess.Popen(
|
||||
[
|
||||
self.python_path,
|
||||
# Don't "import site", so we don't include system/user pip-installed
|
||||
# packages.
|
||||
# Don't "import site" right away, so we can split the standard library
|
||||
# paths from the site paths.
|
||||
"-S",
|
||||
"-c",
|
||||
"import sys; print(sys.path)",
|
||||
"import sys; from collections import OrderedDict; "
|
||||
# Skip the first item in the sys.path, as it's the working directory
|
||||
# of the invoked script (so, in this case, "").
|
||||
# Use list(OrderectDict...) to de-dupe items, such as when using
|
||||
# pyenv on Linux.
|
||||
"print(list(OrderedDict.fromkeys(sys.path[1:])))",
|
||||
],
|
||||
universal_newlines=True,
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
)
|
||||
return ast.literal_eval(stdlib_paths)
|
||||
system = subprocess.Popen(
|
||||
[
|
||||
self.python_path,
|
||||
"-c",
|
||||
"import sys; import site; " "packages = site.getsitepackages(); "
|
||||
# Only add the "user site packages" if not in a virtualenv (which is
|
||||
# identified by the prefix == base_prefix check
|
||||
"packages.insert(0, site.getusersitepackages()) if "
|
||||
" sys.prefix == sys.base_prefix else None;"
|
||||
"print(packages)",
|
||||
],
|
||||
universal_newlines=True,
|
||||
env=env,
|
||||
stdout=subprocess.PIPE,
|
||||
)
|
||||
# Run python processes in parallel - they take roughly the same time, so this
|
||||
# cuts this functions run time in half.
|
||||
stdlib_out, _ = stdlib.communicate()
|
||||
system_out, _ = system.communicate()
|
||||
assert stdlib.returncode == 0
|
||||
assert system.returncode == 0
|
||||
return ast.literal_eval(stdlib_out), ast.literal_eval(system_out)
|
||||
|
||||
def sys_path_stdlib(self):
|
||||
"""Return list of default sys.path entries for the standard library"""
|
||||
stdlib, _ = self.sys_path()
|
||||
return stdlib
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
|
2
python/sites/lint.txt
Normal file
2
python/sites/lint.txt
Normal file
@ -0,0 +1,2 @@
|
||||
|
||||
|
@ -72,6 +72,7 @@ def get_global_excludes(**lintargs):
|
||||
category="devenv",
|
||||
description="Run linters.",
|
||||
parser=setup_argument_parser,
|
||||
virtualenv_name="lint",
|
||||
)
|
||||
def lint(command_context, *runargs, **lintargs):
|
||||
"""Run linters."""
|
||||
|
Loading…
Reference in New Issue
Block a user