gecko-dev/tools/moztreedocs/upload.py
Andrew Halberstadt 78e5d41e3e Bug 1664771 - [docs] Remove bogus 'Expires' header when uploading docs, r=firefox-source-docs-reviewers,championshuttler
As per:
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html

This header doesn't actually control when an object gets deleted. Rather it is when the object
expires in the browser cache (i.e, when the expiry is hit, the browser will reload the object).

Remove this as it's only causing confusion.

Differential Revision: https://phabricator.services.mozilla.com/D90162
2020-09-14 20:06:57 +00:00

174 lines
5.4 KiB
Python

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, # You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import absolute_import, unicode_literals, print_function
import io
import mimetypes
import os
import sys
import botocore
import boto3
import concurrent.futures as futures
import requests
from pprint import pprint
from mozbuild.util import memoize
@memoize
def create_aws_session():
"""
This function creates an aws session that is
shared between upload and delete both.
"""
region = "us-west-2"
level = os.environ.get("MOZ_SCM_LEVEL", "1")
bucket = {
"1": "gecko-docs.mozilla.org-l1",
"2": "gecko-docs.mozilla.org-l2",
"3": "gecko-docs.mozilla.org",
}[level]
secrets_url = "http://taskcluster/secrets/v1/secret/"
secrets_url += "project/releng/gecko/build/level-{}/gecko-docs-upload".format(level)
# Get the credentials from the TC secrets service. Note that these
# differ per SCM level
if "TASK_ID" in os.environ:
print("Using AWS credentials from the secrets service")
session = requests.Session()
res = session.get(secrets_url)
res.raise_for_status()
secret = res.json()["secret"]
session = boto3.session.Session(
aws_access_key_id=secret["AWS_ACCESS_KEY_ID"],
aws_secret_access_key=secret["AWS_SECRET_ACCESS_KEY"],
region_name=region,
)
else:
print("Trying to use your AWS credentials..")
session = boto3.session.Session(region_name=region)
s3 = session.client("s3", config=botocore.client.Config(max_pool_connections=20))
return s3, bucket
@memoize
def get_s3_keys(s3, bucket):
kwargs = {"Bucket": bucket}
all_keys = []
while True:
response = s3.list_objects_v2(**kwargs)
for obj in response["Contents"]:
all_keys.append(obj["Key"])
try:
kwargs["ContinuationToken"] = response["NextContinuationToken"]
except KeyError:
break
return all_keys
def s3_set_redirects(redirects):
s3, bucket = create_aws_session()
configuration = {"IndexDocument": {"Suffix": "index.html"}, "RoutingRules": []}
for path, redirect in redirects.items():
rule = {
"Condition": {"KeyPrefixEquals": path},
"Redirect": {"ReplaceKeyPrefixWith": redirect},
}
if os.environ.get("MOZ_SCM_LEVEL") == "3":
rule["Redirect"]["HostName"] = "firefox-source-docs.mozilla.org"
configuration["RoutingRules"].append(rule)
s3.put_bucket_website(
Bucket=bucket,
WebsiteConfiguration=configuration,
)
def s3_delete_missing(files, key_prefix=None):
"""Delete files in the S3 bucket.
Delete files on the S3 bucket that doesn't match the files
given as the param. If the key_prefix is not specified, missing
files that has main/ as a prefix will be removed. Otherwise, it
will remove files with the same prefix as key_prefix.
"""
s3, bucket = create_aws_session()
files_on_server = get_s3_keys(s3, bucket)
if key_prefix:
files_on_server = [
path for path in files_on_server if path.startswith(key_prefix)
]
else:
files_on_server = [
path for path in files_on_server if not path.startswith("main/")
]
files = [key_prefix + "/" + path if key_prefix else path for path, f in files]
files_to_delete = [path for path in files_on_server if path not in files]
query_size = 1000
while files_to_delete:
keys_to_remove = [{"Key": key} for key in files_to_delete[:query_size]]
response = s3.delete_objects(
Bucket=bucket,
Delete={
"Objects": keys_to_remove,
}, # NOQA
)
pprint(response, indent=2)
files_to_delete = files_to_delete[query_size:]
def s3_upload(files, key_prefix=None):
"""Upload files to an S3 bucket.
``files`` is an iterable of ``(path, BaseFile)`` (typically from a
mozpack Finder).
Keys in the bucket correspond to source filenames. If ``key_prefix`` is
defined, key names will be ``<key_prefix>/<path>``.
"""
s3, bucket = create_aws_session()
def upload(f, path, bucket, key, extra_args):
# Need to flush to avoid buffering/interleaving from multiple threads.
sys.stdout.write("uploading %s to %s\n" % (path, key))
sys.stdout.flush()
s3.upload_fileobj(f, bucket, key, ExtraArgs=extra_args)
fs = []
with futures.ThreadPoolExecutor(20) as e:
for path, f in files:
content_type, content_encoding = mimetypes.guess_type(path)
extra_args = {}
if content_type:
extra_args["ContentType"] = content_type
if content_encoding:
extra_args["ContentEncoding"] = content_encoding
if key_prefix:
key = "%s/%s" % (key_prefix, path)
else:
key = path
# The file types returned by mozpack behave like file objects. But
# they don't accept an argument to read(). So we wrap in a BytesIO.
fs.append(
e.submit(upload, io.BytesIO(f.read()), path, bucket, key, extra_args)
)
s3_delete_missing(files, key_prefix)
# Need to do this to catch any exceptions.
for f in fs:
f.result()