mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-24 05:11:16 +00:00
Bug 527670: Async download/cache UpdateVerify files and improve logs r=aki
Changed wget output log granularity from 📣 to :giga: (only used in case the python library didn't cover all usecases) Added python methods to async cache/download all required files for update verify Modified Dockerfile requirements to include aiohttp python lib Differential Revision: https://phabricator.services.mozilla.com/D129429
This commit is contained in:
parent
20bdf607b8
commit
9963047f51
@ -12,6 +12,7 @@ apt_packages+=('curl')
|
||||
apt_packages+=('locales')
|
||||
apt_packages+=('python2')
|
||||
apt_packages+=('python3-pip')
|
||||
apt_packages+=('python3-aiohttp')
|
||||
apt_packages+=('shellcheck')
|
||||
apt_packages+=('sudo')
|
||||
|
||||
|
@ -6,15 +6,10 @@ pushd `dirname $0` &>/dev/null
|
||||
cache_dir="$(pwd)/cache"
|
||||
popd &>/dev/null
|
||||
|
||||
# to clear the entire cache, recommended at beginning and end of scripts that call it
|
||||
# Deletes all files in the cache directory
|
||||
# We don't support folders or .dot(hidden) files
|
||||
clear_cache () {
|
||||
rm -rf "${cache_dir}"
|
||||
}
|
||||
|
||||
# creates an empty cache, should be called once before downloading anything
|
||||
function create_cache () {
|
||||
mkdir "${cache_dir}"
|
||||
touch "${cache_dir}/urls.list"
|
||||
rm -rf "${cache_dir}/*"
|
||||
}
|
||||
|
||||
# download method - you pass a filename to save the file under, and the url to call
|
||||
@ -29,7 +24,7 @@ cached_download () {
|
||||
else
|
||||
echo "Downloading '${url}' and placing in cache..."
|
||||
rm -f "${output_file}"
|
||||
$retry wget -O "${output_file}" --progress=dot:mega --server-response "${url}" 2>&1
|
||||
$retry wget -O "${output_file}" --progress=dot:giga --server-response "${url}" 2>&1
|
||||
local exit_code=$?
|
||||
if [ "${exit_code}" == 0 ]; then
|
||||
echo "${url}" >> "${cache_dir}/urls.list"
|
||||
|
@ -7,8 +7,9 @@
|
||||
. ../common/download_builds.sh
|
||||
. ../common/check_updates.sh
|
||||
|
||||
clear_cache
|
||||
create_cache
|
||||
# Cache being handled by new async_download.py
|
||||
# clear_cache
|
||||
# create_cache
|
||||
|
||||
ftp_server_to="http://stage.mozilla.org/pub/mozilla.org"
|
||||
ftp_server_from="http://stage.mozilla.org/pub/mozilla.org"
|
||||
|
336
tools/update-verify/scripts/async_download.py
Normal file
336
tools/update-verify/scripts/async_download.py
Normal file
@ -0,0 +1,336 @@
|
||||
#!/usr/bin/env python3
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from os import path
|
||||
import glob
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(message)s")
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
UV_CACHE_PATH = os.getenv(
|
||||
"UV_CACHE_PATH", os.path.join(path.dirname(__file__), "../release/updates/cache/")
|
||||
)
|
||||
UV_PARALLEL_DOWNLOADS = os.getenv("UV_PARALLEL_DOWNLOADS", 20)
|
||||
|
||||
FTP_SERVER_TO = os.getenv("ftp_server_to", "http://stage.mozilla.org/pub/mozilla.org")
|
||||
FTP_SERVER_FROM = os.getenv(
|
||||
"ftp_server_from", "http://stage.mozilla.org/pub/mozilla.org"
|
||||
)
|
||||
AUS_SERVER = os.getenv("aus_server", "https://aus5.mozilla.org")
|
||||
|
||||
|
||||
def create_cache():
|
||||
if not os.path.isdir(UV_CACHE_PATH):
|
||||
os.mkdir(UV_CACHE_PATH)
|
||||
|
||||
|
||||
def remove_cache():
|
||||
"""
|
||||
Removes all files in the cache folder
|
||||
We don't support folders or .dot(hidden) files
|
||||
By not deleting the cache directory, it allows us to use Docker tmpfs mounts,
|
||||
which are the only workaround to poor mount r/w performance on MacOS
|
||||
Bug Reference:
|
||||
https://forums.docker.com/t/file-access-in-mounted-volumes-extremely-slow-cpu-bound/8076/288
|
||||
"""
|
||||
files = glob.glob(f"{UV_CACHE_PATH}/*")
|
||||
for f in files:
|
||||
os.remove(f)
|
||||
|
||||
|
||||
def _cachepath(i, ext):
|
||||
# Helper function: given an index, return a cache file path
|
||||
return path.join(UV_CACHE_PATH, f"obj_{i:0>5}.{ext}")
|
||||
|
||||
|
||||
async def fetch_url(url, path, connector):
|
||||
"""
|
||||
Fetch/download a file to a specific path
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL to be fetched
|
||||
|
||||
path : str
|
||||
Path to save binary
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple
|
||||
Err, Headers
|
||||
"""
|
||||
|
||||
def _result(response):
|
||||
data = {
|
||||
"headers": dict(response.headers),
|
||||
"status": response.status,
|
||||
"reason": response.reason,
|
||||
"_request_info": str(response._request_info),
|
||||
"url": url,
|
||||
"path": path,
|
||||
}
|
||||
return data
|
||||
|
||||
async with aiohttp.ClientSession(
|
||||
connector=connector, connector_owner=False
|
||||
) as session:
|
||||
log.info(f"Retrieving {url}")
|
||||
async with session.get(url) as response:
|
||||
if response.status >= 299:
|
||||
log.warn(f"Failed to download {url} with status {response.status}")
|
||||
return _result(response), None
|
||||
with open(path, "wb") as fd:
|
||||
while True:
|
||||
chunk = await response.content.read()
|
||||
if not chunk:
|
||||
break
|
||||
fd.write(chunk)
|
||||
result = _result(response)
|
||||
log.info(f'Finished downloading {url}\n{result["headers"]}')
|
||||
return None, result
|
||||
|
||||
|
||||
async def download_multi(targets, sourceFunc):
|
||||
"""
|
||||
Download list of targets
|
||||
|
||||
Parameters
|
||||
----------
|
||||
targets : list
|
||||
List of urls to download
|
||||
|
||||
sourceFunc : str
|
||||
Source function name (for filename)
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple
|
||||
List of responses (Headers)
|
||||
"""
|
||||
|
||||
targets = set(targets)
|
||||
amount = len(targets)
|
||||
|
||||
connector = aiohttp.TCPConnector(
|
||||
limit=UV_PARALLEL_DOWNLOADS, # Simultaneous connections, per host
|
||||
ttl_dns_cache=600, # Cache DNS for 10 mins
|
||||
)
|
||||
|
||||
log.info(f"\nDownloading {amount} files ({UV_PARALLEL_DOWNLOADS} async limit)")
|
||||
|
||||
# Transform targets into {url, path} objects
|
||||
payloads = [
|
||||
{"url": url, "path": _cachepath(i, sourceFunc)}
|
||||
for (i, url) in enumerate(targets)
|
||||
]
|
||||
|
||||
downloads = []
|
||||
|
||||
fetches = [fetch_url(t["url"], t["path"], connector) for t in payloads]
|
||||
|
||||
downloads.extend(await asyncio.gather(*fetches))
|
||||
connector.close()
|
||||
|
||||
results = []
|
||||
# Remove file if download failed
|
||||
for fetch in downloads:
|
||||
if fetch[0]:
|
||||
try:
|
||||
os.unlink(fetch["path"])
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
results.append(fetch[1])
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def download_builds(verifyConfig):
|
||||
"""
|
||||
Given UpdateVerifyConfig, download and cache all necessary updater files
|
||||
Include "to" and "from"/"updater_pacakge"
|
||||
|
||||
Parameters
|
||||
----------
|
||||
verifyConfig : UpdateVerifyConfig
|
||||
Chunked config
|
||||
|
||||
Returns
|
||||
-------
|
||||
list : List of file paths and urls to each updater file
|
||||
"""
|
||||
|
||||
updaterUrls = set()
|
||||
for release in verifyConfig.releases:
|
||||
ftpServerFrom = release["ftp_server_from"]
|
||||
ftpServerTo = release["ftp_server_to"]
|
||||
|
||||
for locale in release["locales"]:
|
||||
toUri = verifyConfig.to
|
||||
if toUri is not None and ftpServerTo is not None:
|
||||
toUri = toUri.replace("%locale%", locale)
|
||||
updaterUrls.add(f"{ftpServerTo}{toUri}")
|
||||
|
||||
for reference in ("updater_package", "from"):
|
||||
uri = release.get(reference, None)
|
||||
if uri is None:
|
||||
continue
|
||||
uri = uri.replace("%locale%", locale)
|
||||
updaterUrls.add(f"{ftpServerFrom}{uri}")
|
||||
|
||||
log.info(f"About to download {len(updaterUrls)} updater packages")
|
||||
|
||||
updaterResults = await download_multi(list(updaterUrls), "updater.async.cache")
|
||||
return updaterResults
|
||||
|
||||
|
||||
def get_mar_urls_from_update(path):
|
||||
"""
|
||||
Given an update.xml file, return MAR URLs
|
||||
|
||||
If update.xml doesn't have URLs, returns empty list
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : str
|
||||
Path to update.xml file
|
||||
|
||||
Returns
|
||||
-------
|
||||
list : List of URLs
|
||||
"""
|
||||
|
||||
result = []
|
||||
root = ET.parse(path).getroot()
|
||||
for patch in root.findall("update/patch"):
|
||||
url = patch.get("URL")
|
||||
if url:
|
||||
result.append(url)
|
||||
return result
|
||||
|
||||
|
||||
async def download_mars(updatePaths):
|
||||
"""
|
||||
Given list of update.xml paths, download MARs for each
|
||||
|
||||
Parameters
|
||||
----------
|
||||
update_paths : list
|
||||
List of paths to update.xml files
|
||||
"""
|
||||
|
||||
patchUrls = set()
|
||||
for updatePath in updatePaths:
|
||||
for url in get_mar_urls_from_update(updatePath):
|
||||
patchUrls.add(url)
|
||||
|
||||
log.info(f"About to download {len(patchUrls)} MAR packages")
|
||||
marResults = await download_multi(list(patchUrls), "mar.async.cache")
|
||||
return marResults
|
||||
|
||||
|
||||
async def download_update_xml(verifyConfig):
|
||||
"""
|
||||
Given UpdateVerifyConfig, download and cache all necessary update.xml files
|
||||
|
||||
Parameters
|
||||
----------
|
||||
verifyConfig : UpdateVerifyConfig
|
||||
Chunked config
|
||||
|
||||
Returns
|
||||
-------
|
||||
list : List of file paths and urls to each update.xml file
|
||||
"""
|
||||
|
||||
xmlUrls = set()
|
||||
product = verifyConfig.product
|
||||
urlTemplate = (
|
||||
"{server}/update/3/{product}/{release}/{build}/{platform}/"
|
||||
"{locale}/{channel}/default/default/default/update.xml?force=1"
|
||||
)
|
||||
|
||||
for release in verifyConfig.releases:
|
||||
for locale in release["locales"]:
|
||||
xmlUrls.add(
|
||||
urlTemplate.format(
|
||||
server=AUS_SERVER,
|
||||
product=product,
|
||||
release=release["release"],
|
||||
build=release["build_id"],
|
||||
platform=release["platform"],
|
||||
locale=locale,
|
||||
channel=verifyConfig.channel,
|
||||
)
|
||||
)
|
||||
|
||||
log.info(f"About to download {len(xmlUrls)} update.xml files")
|
||||
xmlResults = await download_multi(list(xmlUrls), "update.xml.async.cache")
|
||||
return xmlResults
|
||||
|
||||
|
||||
async def _download_from_config(verifyConfig):
|
||||
"""
|
||||
Given an UpdateVerifyConfig object, download all necessary files to cache
|
||||
|
||||
Parameters
|
||||
----------
|
||||
verifyConfig : UpdateVerifyConfig
|
||||
The config - already chunked
|
||||
"""
|
||||
remove_cache()
|
||||
create_cache()
|
||||
|
||||
downloadList = []
|
||||
##################
|
||||
# Download files #
|
||||
##################
|
||||
xmlFiles = await download_update_xml(verifyConfig)
|
||||
downloadList.extend(xmlFiles)
|
||||
downloadList += await download_mars(x["path"] for x in xmlFiles)
|
||||
downloadList += await download_builds(verifyConfig)
|
||||
|
||||
#####################
|
||||
# Create cache.list #
|
||||
#####################
|
||||
cacheLinks = []
|
||||
|
||||
# Rename files and add to cache_links
|
||||
for download in downloadList:
|
||||
cacheLinks.append(download["url"])
|
||||
fileIndex = len(cacheLinks)
|
||||
os.rename(download["path"], _cachepath(fileIndex, "cache"))
|
||||
|
||||
cacheIndexPath = path.join(UV_CACHE_PATH, "urls.list")
|
||||
with open(cacheIndexPath, "w") as cache:
|
||||
cache.writelines(f"{l}\n" for l in cacheLinks)
|
||||
|
||||
# Log cache
|
||||
log.info("Cache index urls.list contents:")
|
||||
with open(cacheIndexPath, "r") as cache:
|
||||
for ln, url in enumerate(cache.readlines()):
|
||||
line = url.replace("\n", "")
|
||||
log.info(f"Line {ln+1}: {line}")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def download_from_config(verifyConfig):
|
||||
"""
|
||||
Given an UpdateVerifyConfig object, download all necessary files to cache
|
||||
(sync function that calls the async one)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
verifyConfig : UpdateVerifyConfig
|
||||
The config - already chunked
|
||||
"""
|
||||
return asyncio.run(_download_from_config(verifyConfig))
|
@ -18,6 +18,8 @@ log = logging.getLogger(__name__)
|
||||
from mozrelease.update_verify import UpdateVerifyConfig
|
||||
from util.commands import run_cmd
|
||||
|
||||
from async_download import download_from_config
|
||||
|
||||
UPDATE_VERIFY_COMMAND = ["bash", "verify.sh", "-c"]
|
||||
UPDATE_VERIFY_DIR = path.join(path.dirname(__file__), "../release/updates")
|
||||
|
||||
@ -55,6 +57,10 @@ if __name__ == "__main__":
|
||||
myVerifyConfig.write(fh)
|
||||
fh.close()
|
||||
run_cmd(["cat", configFile])
|
||||
|
||||
# Before verifying, we want to download and cache all required files
|
||||
download_from_config(myVerifyConfig)
|
||||
|
||||
run_cmd(
|
||||
UPDATE_VERIFY_COMMAND + [configFile],
|
||||
cwd=UPDATE_VERIFY_DIR,
|
||||
|
Loading…
Reference in New Issue
Block a user