From 70e8dc46bd19c85b6d015a0f2c5082707560d3d1 Mon Sep 17 00:00:00 2001 From: Mitchell Hentges Date: Thu, 10 Mar 2022 20:41:54 +0000 Subject: [PATCH] Bug 1755516: Expose config variable for Mach native dependency source r=ahal Adds support and documentation for the `MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE` environment variable. The "fallback" behaviour here should be backwards-compatible, which will enable us to piece-by-piece explicitly use the `"system"` where needed, then remove the backwards-compatibility shim afterwards. A restriction added by this change is that it's no longer possible to have the combination of: * Have Mach use the system Python, and * Have the active command site do `pip install`. This is because this case shouldn't be necessary (if `pip install` is allowed, why use the system?) and will allow us to enforce that all non-build command sites always use their lockfiles, when they're set up. When referring to behaviour in CI, I've opted to only refer to the upcoming behaviour (of `MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE="none"`) because the current heuristic behaviour feels complex enough that it should be understood by checking the code, rather than docs. Besides, the heuristic will be removed pretty soon (right? ;) Differential Revision: https://phabricator.services.mozilla.com/D138932 --- python/docs/index.rst | 104 ++++++++++- python/mach/mach/site.py | 225 +++++++++++++++--------- python/mach/mach/telemetry_interface.py | 20 ++- python/mach/mach/test/python.ini | 1 + python/mach/mach/test/test_site.py | 54 ++++++ 5 files changed, 313 insertions(+), 91 deletions(-) create mode 100644 python/mach/mach/test/test_site.py diff --git a/python/docs/index.rst b/python/docs/index.rst index 75be21b82bba..5be003b1b88b 100644 --- a/python/docs/index.rst +++ b/python/docs/index.rst @@ -4,7 +4,6 @@ Using third-party Python packages Mach and its associated commands have a variety of 3rd-party Python dependencies. Many of these are vendored in ``third_party/python``, while others are installed at runtime via ``pip``. -Dependencies with "native code" are handled on a machine-by-machine basis. The dependencies of Mach itself can be found at ``build/mach_virtualenv_packages.txt``. Mach commands may have additional dependencies which are specified at ``build/_virtualenv_packages.txt``. @@ -36,6 +35,11 @@ There's two ways of using 3rd-party Python dependencies: * :ref:`Vendor the source of the Python package in-tree `. Dependencies of the Mach core logic or of building Firefox itself must be vendored. +.. note:: + + For dependencies that meet both restrictions (dependency of Mach/build, *and* has + native code), see the :ref:`mach-and-build-native-dependencies` section below. + .. note:: If encountering an ``ImportError``, even after following either of the above techniques, @@ -113,6 +117,104 @@ Next, add that package and any new transitive dependencies (you'll see them adde a Firefox chemspill. Therefore, packages required by Mach core logic or for building Firefox itself must be vendored. +.. _mach-and-build-native-dependencies: + +Mach/Build Native 3rd-party Dependencies +======================================== + +There are cases where Firefox is built without being able to ``pip install``, but where +native 3rd party Python dependencies enable optional functionality. This can't be solved +by vendoring the platform-specific libraries, as then each one would have to be stored +multiple times in-tree according to how many platforms we wish to support. + +Instead, this is solved by pre-installing such native packages onto the host system +in advance, then having Mach attempt to use such packages directly from the system. +This feature is only viable in very specific environments, as the system Python packages +have to be compatible with Mach's vendored packages. + +.. note: + + All of these native build-specific dependencies **MUST** be optional requirements + as to support the "no strings attached" builds that only use vendored packages. + +To control this behaviour, the ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE`` environment +variable can be used: + +.. list-table:: ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE`` + :header-rows: 1 + + * - ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE`` + - Behaviour + * - ``"pip"`` + - Mach will ``pip install`` all needed dependencies from PyPI at runtime into a Python + virtual environment that's reused in future Mach invocations. + * - ``"none"`` + - Mach will perform the build using only vendored packages. No Python virtual environment + will be created for Mach. + * - ``"system"`` + - Mach will use the host system's Python packages as part of doing the build. This option + allows the usage of native Python packages without leaning on a ``pip install`` at + build-time. This is generally slower because the system Python packages have to + be asserted to be compatible with Mach. Additionally, dependency lockfiles are ignored, + so there's higher risk of breakage. Finally, as with ``"none"``, no Python virtualenv + environment is created for Mach. + * - ```` + - Same behaviour as ``"pip"`` if ``MOZ_AUTOMATION`` isn't set. Otherwise, uses + the same behaviour as ``"system"`` if any needed native Python packages can be found in + the system Python. + +There's a couple restrictions here: + +* ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE`` only applies to the top-level ``"mach"`` site and the + ``"build"`` site. All other sites will use ``pip install`` at run-time as needed. + +* ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE="system"`` is not allowed when using any site other + than ``"mach"`` or ``"build"``, because: + + * As described in :ref:`package-compatibility` below, packages used by Mach are still + in scope when commands are run, and + * The host system is practically guaranteed to be incompatible with commands' dependency + lockfiles. + +The ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE`` environment variable fits into the following use +cases: + +Mozilla CI Builds +~~~~~~~~~~~~~~~~~ + +We need access to the native packages of ``zstandard`` and ``psutil`` to extract archives and +get OS information respectively. Use ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE="system"``. + +Mozilla CI non-Build Tasks +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We generally don't want to create a Mach virtual environment, but it's ok to ``pip install`` +for specific command sites as needed. Use ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE="none"``. + +Downstream CI Builds +~~~~~~~~~~~~~~~~~~~~ + +Sometimes these builds happen in sandboxed, network-less environments, and usually these builds +don't need any of the behaviour enabled by installing native Python dependencies. +Use ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE="none"``. + +Gentoo Builds +~~~~~~~~~~~~~ + +When installing Firefox via the package manager, Gentoo generally builds it from source rather than +distributing a compiled binary artifact. Accordingly, users doing a build of Firefox in this +context don't want stray files created in ``~/.mozbuild`` or unnecessary ``pip install`` calls. +Use ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE="none"``. + +Firefox Developers +~~~~~~~~~~~~~~~~~~ + +Leave ``MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE`` unset so that all Mach commands can be run, +Python dependency lockfiles are respected, and optional behaviour is enabled by installing +native packages. + +.. _package-compatibility: + Package compatibility ===================== diff --git a/python/mach/mach/site.py b/python/mach/mach/site.py index 71cdcecab37f..8e0f04799ac9 100644 --- a/python/mach/mach/site.py +++ b/python/mach/mach/site.py @@ -50,9 +50,68 @@ class InstallPipRequirementsException(Exception): class SitePackagesSource(enum.Enum): - NONE = enum.auto() - SYSTEM = enum.auto() - VENV = enum.auto() + NONE = "none" + SYSTEM = "system" + VENV = "pip" + + @classmethod + def from_environment(cls, external_python, site_name, requirements): + source = os.environ.get("MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE", "").lower() + if source == "system": + source = SitePackagesSource.SYSTEM + elif source == "none": + source = SitePackagesSource.NONE + elif source == "pip": + source = SitePackagesSource.VENV + elif source: + raise Exception( + "Unexpected MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE value, expected one " + 'of "system", "pip", "none", or to not be set' + ) + + if site_name not in PIP_NETWORK_INSTALL_RESTRICTED_VIRTUALENVS: + if source == SitePackagesSource.SYSTEM: + raise Exception( + 'Cannot use MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE="system" for any ' + f"sites other than {PIP_NETWORK_INSTALL_RESTRICTED_VIRTUALENVS}. The " + f'current attempted site is "{site_name}".' + ) + + return SitePackagesSource.VENV + + mach_use_system_python = bool(os.environ.get("MACH_USE_SYSTEM_PYTHON")) + moz_automation = bool(os.environ.get("MOZ_AUTOMATION")) + + if source: + if mach_use_system_python: + raise Exception( + "The MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE environment variable is " + "set, so the MACH_USE_SYSTEM_PYTHON variable is redundant and " + "should be unset." + ) + return source + + if not (mach_use_system_python or moz_automation): + return SitePackagesSource.VENV + + # Only print this warning once for the Mach site, so we don't spam it every + # time a site handle is created. + if site_name == "mach" and mach_use_system_python: + print( + 'The "MACH_USE_SYSTEM_PYTHON" environment variable is deprecated, ' + "please unset it or replace it with either " + '"MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE=system" or ' + '"MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE=none"' + ) + + if not external_python.has_pip(): + source = SitePackagesSource.NONE + elif external_python.provides_any_package(site_name, requirements): + source = SitePackagesSource.SYSTEM + else: + source = SitePackagesSource.NONE + + return source class MozSiteMetadata: @@ -265,20 +324,10 @@ class MachSiteManager: else: original_python = external_python - if not _system_python_env_variable_present(): - source = SitePackagesSource.VENV - state_dir = get_state_dir() - elif not external_python.has_pip(): - source = SitePackagesSource.NONE - state_dir = None - else: - source = ( - SitePackagesSource.SYSTEM - if external_python.provides_any_package("mach", requirements) - else SitePackagesSource.NONE - ) - state_dir = None - + source = SitePackagesSource.from_environment( + external_python, "mach", requirements + ) + state_dir = get_state_dir() if source == SitePackagesSource.VENV else None return cls( topsrcdir, state_dir, @@ -295,7 +344,9 @@ class MachSiteManager: *self._requirements.pths_as_absolute(self._topsrcdir), *sys.path, ] - _assert_pip_check(self._topsrcdir, pthfile_lines, "mach") + _assert_pip_check( + self._topsrcdir, pthfile_lines, "mach", self._requirements + ) return True elif self._site_packages_source == SitePackagesSource.VENV: environment = self._virtualenv() @@ -478,36 +529,24 @@ class CommandSiteManager: active_metadata ), "A Mach-managed site must be active before doing work with command sites" + external_python = ExternalPythonSite(sys.executable) requirements = resolve_requirements(topsrcdir, site_name) - if ( - not _system_python_env_variable_present() - or site_name not in PIP_NETWORK_INSTALL_RESTRICTED_VIRTUALENVS - ): - source = SitePackagesSource.VENV - else: - external_python = ExternalPythonSite(sys.executable) - if not external_python.has_pip(): - if requirements.pypi_requirements: - raise Exception( - f'The "{site_name}" site requires pip ' - "packages, and Mach has been told to find such pip packages in " - "the system environment, but it can't because the system doesn't " - 'have "pip" installed.' - ) - source = SitePackagesSource.NONE - else: - source = ( - SitePackagesSource.SYSTEM - if external_python.provides_any_package(site_name, requirements) - else SitePackagesSource.NONE - ) + source = SitePackagesSource.from_environment( + external_python, site_name, requirements + ) + if source == SitePackagesSource.NONE and requirements.pypi_requirements: + raise Exception( + f'The "{site_name}" site requires pip ' + "packages, and Mach has been told to find such pip packages in " + "the system environment, but it can't because the system doesn't " + 'have "pip" installed.' + ) checkout_scoped_state_dir = ( get_state_dir() if active_metadata.mach_site_packages_source == SitePackagesSource.VENV else None ) - return cls( topsrcdir, checkout_scoped_state_dir, @@ -718,11 +757,15 @@ class CommandSiteManager: def _up_to_date(self): pthfile_lines = self._pthfile_lines() - if ( - self._site_packages_source == SitePackagesSource.SYSTEM - or self._mach_site_packages_source == SitePackagesSource.SYSTEM - ): - _assert_pip_check(self._topsrcdir, pthfile_lines, self._site_name) + if self._mach_site_packages_source == SitePackagesSource.SYSTEM: + _assert_pip_check( + self._topsrcdir, + pthfile_lines, + self._site_name, + self._requirements + if self._site_packages_source == SitePackagesSource.SYSTEM + else None, + ) return _is_venv_up_to_date( self._topsrcdir, @@ -844,7 +887,7 @@ class PythonVirtualenv: return _resolve_installed_packages(self.python_path) -class ExternalSitePackageValidationResult: +class RequirementsValidationResult: def __init__(self): self._package_discrepancies = [] self.has_all_packages = True @@ -864,6 +907,29 @@ class ExternalSitePackageValidationResult: lines.append(f"{requirement}: {error}") return "\n".join(lines) + @classmethod + def from_packages(cls, packages, requirements): + result = cls() + for pkg in requirements.pypi_requirements: + installed_version = packages.get(pkg.requirement.name) + if not installed_version or not pkg.requirement.specifier.contains( + installed_version + ): + result.add_discrepancy(pkg.requirement, installed_version) + elif installed_version: + result.provides_any_package = True + + for pkg in requirements.pypi_optional_requirements: + installed_version = packages.get(pkg.requirement.name) + if installed_version and not pkg.requirement.specifier.contains( + installed_version + ): + result.add_discrepancy(pkg.requirement, installed_version) + elif installed_version: + result.provides_any_package = True + + return result + class ExternalPythonSite: """Represents the Python site that is executing Mach @@ -905,25 +971,9 @@ class ExternalPythonSite: def provides_any_package(self, virtualenv_name, requirements): system_packages = self._resolve_installed_packages() - result = ExternalSitePackageValidationResult() - for pkg in requirements.pypi_requirements: - installed_version = system_packages.get(pkg.requirement.name) - if not installed_version or not pkg.requirement.specifier.contains( - installed_version - ): - result.add_discrepancy(pkg.requirement, installed_version) - elif installed_version: - result.provides_any_package = True - - for pkg in requirements.pypi_optional_requirements: - installed_version = system_packages.get(pkg.requirement.name) - if installed_version and not pkg.requirement.specifier.contains( - installed_version - ): - result.add_discrepancy(pkg.requirement, installed_version) - elif installed_version: - result.provides_any_package = True - + result = RequirementsValidationResult.from_packages( + system_packages, requirements + ) if not result.has_all_packages: print(result.report()) raise Exception( @@ -976,12 +1026,6 @@ def _virtualenv_py_path(topsrcdir): ) -def _system_python_env_variable_present(): - return any( - os.environ.get(var) for var in ("MACH_USE_SYSTEM_PYTHON", "MOZ_AUTOMATION") - ) - - def _resolve_installed_packages(python_executable): pip_json = subprocess.check_output( [ @@ -1000,11 +1044,14 @@ def _resolve_installed_packages(python_executable): return {package["name"]: package["version"] for package in installed_packages} -def _assert_pip_check(topsrcdir, pthfile_lines, virtualenv_name): +def _assert_pip_check(topsrcdir, pthfile_lines, virtualenv_name, requirements): """Check if the provided pthfile lines have a package incompatibility If there's an incompatibility, raise an exception and allow it to bubble up since it will require user intervention to resolve. + + If requirements aren't provided (such as when Mach is using SYSTEM, but the command + site is using VENV), then skip the "pthfile satisfies requirements" step. """ if os.environ.get( f"MACH_SYSTEM_ASSERTED_COMPATIBLE_WITH_{virtualenv_name.upper()}_SITE", None @@ -1012,21 +1059,10 @@ def _assert_pip_check(topsrcdir, pthfile_lines, virtualenv_name): # Don't re-assert compatibility against the system python within Mach subshells. return - if ( - virtualenv_name == "mach" - and os.environ.get("MACH_USE_SYSTEM_PYTHON") - and not os.environ.get("MOZ_AUTOMATION") - ): - # Since this assertion takes some time, warn users who have explicitly opted - # in. Since we only want this message to be printed once, only do it for the - # first virtualenv that's used (which is always "mach"). - print( - "Since Mach has been requested to use the system Python " - "environment, it will need to verify compatibility before " - "running the current command. This may take a couple seconds.\n" - "Note: you can avoid this delay by unsetting the " - "MACH_USE_SYSTEM_PYTHON environment variable." - ) + print( + 'Running "pip check" to verify compatibility between the system Python and the ' + f'"{virtualenv_name}" site.' + ) with tempfile.TemporaryDirectory() as check_env_path: # Pip detects packages on the "sys.path" that have a ".dist-info" or @@ -1056,6 +1092,19 @@ def _assert_pip_check(topsrcdir, pthfile_lines, virtualenv_name): f.write("\n".join(pthfile_lines)) pip = [check_env.python_path, "-m", "pip"] + if requirements: + packages = _resolve_installed_packages(check_env.python_path) + validation_result = RequirementsValidationResult.from_packages( + packages, requirements + ) + if not validation_result.has_all_packages: + subprocess.check_call(pip + ["list", "-v"], stdout=sys.stderr) + print(validation_result.report(), file=sys.stderr) + raise Exception( + f'The "{virtualenv_name}" site is not compatible with the installed ' + "system Python packages." + ) + check_result = subprocess.run( pip + ["check"], stdout=subprocess.PIPE, diff --git a/python/mach/mach/telemetry_interface.py b/python/mach/mach/telemetry_interface.py index 8b10f11bbed9..1047309ad8b6 100644 --- a/python/mach/mach/telemetry_interface.py +++ b/python/mach/mach/telemetry_interface.py @@ -10,6 +10,8 @@ from pathlib import Path from typing import Union from unittest.mock import Mock +from mach.site import MozSiteMetadata, SitePackagesSource + class NoopTelemetry(object): def __init__(self, failed_glean_import): @@ -20,9 +22,23 @@ class NoopTelemetry(object): def submit(self, is_bootstrap): if self._failed_glean_import and not is_bootstrap: + active_site = MozSiteMetadata.from_runtime() + if active_site.mach_site_packages_source == SitePackagesSource.SYSTEM: + hint = ( + "Mach is looking for glean in the system packages. This can be " + "resolved by installing it there, or by allowing Mach to run " + "without using the system Python packages." + ) + elif active_site.mach_site_packages_source == SitePackagesSource.NONE: + hint = ( + "This is because Mach is currently configured without a source " + "for native Python packages." + ) + else: + hint = "You may need to run |mach bootstrap|." + print( - "Glean could not be found, so telemetry will not be reported. " - "You may need to run |mach bootstrap|.", + f"Glean could not be found, so telemetry will not be reported. {hint}", file=sys.stderr, ) diff --git a/python/mach/mach/test/python.ini b/python/mach/mach/test/python.ini index b60796cff196..de09924b6788 100644 --- a/python/mach/mach/test/python.ini +++ b/python/mach/mach/test/python.ini @@ -12,6 +12,7 @@ skip-if = python == 3 skip-if = python == 3 [test_logger.py] [test_mach.py] +[test_site.py] [test_site_activation.py] [test_site_compatibility.py] # The Windows and Mac workers only use the internal PyPI mirror, diff --git a/python/mach/mach/test/test_site.py b/python/mach/mach/test/test_site.py new file mode 100644 index 000000000000..fbcc78eba3d1 --- /dev/null +++ b/python/mach/mach/test/test_site.py @@ -0,0 +1,54 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from __future__ import absolute_import, unicode_literals + +import os +from unittest import mock +from unittest.mock import Mock + +import pytest as pytest + +from mozunit import main +from mach.site import SitePackagesSource + + +@pytest.mark.parametrize( + "env_native_package_source,env_use_system_python,env_moz_automation,expected", + [ + ("system", False, False, SitePackagesSource.SYSTEM), + ("pip", False, False, SitePackagesSource.VENV), + ("none", False, False, SitePackagesSource.NONE), + (None, False, False, SitePackagesSource.VENV), + (None, False, True, SitePackagesSource.SYSTEM), + (None, True, False, SitePackagesSource.SYSTEM), + (None, True, True, SitePackagesSource.SYSTEM), + ], +) +def test_resolve_package_source( + env_native_package_source, env_use_system_python, env_moz_automation, expected +): + with mock.patch.dict( + os.environ, + { + "MACH_BUILD_PYTHON_NATIVE_PACKAGE_SOURCE": env_native_package_source or "", + "MACH_USE_SYSTEM_PYTHON": "1" if env_use_system_python else "", + "MOZ_AUTOMATION": "1" if env_moz_automation else "", + }, + ): + assert SitePackagesSource.from_environment(Mock(), "build", None) == expected + + +def test_resolve_package_source_always_venv_for_most_sites(): + # Only sites in PIP_NETWORK_INSTALL_RESTRICTED_VIRTUALENVS have to be able to function + # using only vendored packages or system packages. + # All others must have an associated virtualenv. + assert ( + SitePackagesSource.from_environment(Mock(), "python-test", None) + == SitePackagesSource.VENV + ) + + +if __name__ == "__main__": + main()