Bug 1419478 Enable S3 caching for binary diff patch files in partial update tasks r=rail

MozReview-Commit-ID: 23vwFcBkqKd

--HG--
extra : rebase_source : dddafdd3a82d1579073d2abea6e0bfef77a2be8e
This commit is contained in:
Simon Fraser 2017-12-15 14:07:31 -06:00
parent 5ae586e759
commit ff35fd6ca5
5 changed files with 90 additions and 53 deletions

View File

@ -9,7 +9,7 @@ RUN apt-get update -q && \
apt-get install -yyq --no-install-recommends \
python3.5 python3-setuptools python3-cryptography libgetopt-simple-perl \
bzip2 clamav clamav-freshclam python3-requests python3-sh curl \
python3-dev gcc liblzma-dev xz-utils && \
python3-dev gcc liblzma-dev xz-utils jq && \
apt-get clean
RUN useradd -d /home/worker -s /bin/bash -m worker
COPY requirements.txt /tmp/

View File

@ -1,4 +1,5 @@
mar==2.1.2
backports.lzma==0.0.8
datadog==0.17.0
redo
redo==1.6
awscli==1.14.10

View File

@ -12,8 +12,39 @@ mkdir -p "$ARTIFACTS_DIR"
curl --location --retry 10 --retry-delay 10 -o /home/worker/task.json \
"https://queue.taskcluster.net/v1/task/$TASK_ID"
# enable locale cache
export MBSDIFF_HOOK="/home/worker/bin/mbsdiff_hook.sh -c /tmp/fs-cache"
# auth:aws-s3:read-write:tc-gp-private-1d-us-east-1/releng/mbsdiff-cache/
# -> bucket of tc-gp-private-1d-us-east-1, path of releng/mbsdiff-cache/
# Trailing slash is important, due to prefix permissions in S3.
S3_BUCKET_AND_PATH=$(jq -r '.scopes[] | select(contains ("auth:aws-s3"))' /home/worker/task.json | awk -F: '{print $4}')
# Will be empty if there's no scope for AWS S3.
if [ -n "${S3_BUCKET_AND_PATH}" ]; then
# Does this parse as we expect?
S3_PATH=${S3_BUCKET_AND_PATH#*/}
AWS_BUCKET_NAME=${S3_BUCKET_AND_PATH%/${S3_PATH}*}
test "${S3_PATH}"
test "${AWS_BUCKET_NAME}"
set +x # Don't echo these.
secret_url="taskcluster/auth/v1/aws/s3/read-write/${AWS_BUCKET_NAME}/${S3_PATH}"
AUTH=$(curl "${secret_url}")
AWS_ACCESS_KEY_ID=$(echo "${AUTH}" | jq -r '.credentials.accessKeyId')
AWS_SECRET_ACCESS_KEY=$(echo "${AUTH}" | jq -r '.credentials.secretAccessKey')
AWS_SESSION_TOKEN=$(echo "${AUTH}" | jq -r '.credentials.sessionToken')
export AWS_ACCESS_KEY_ID
export AWS_SECRET_ACCESS_KEY
export AWS_SESSION_TOKEN
AUTH=
if [ -n "$AWS_ACCESS_KEY_ID" ] && [ -n "$AWS_SECRET_ACCESS_KEY" ]; then
# Pass the full bucket/path prefix, as the script just appends local files.
export MBSDIFF_HOOK="/home/worker/bin/mbsdiff_hook.sh -S ${S3_BUCKET_AND_PATH}"
fi
set -x
else
# enable locale cache
export MBSDIFF_HOOK="/home/worker/bin/mbsdiff_hook.sh -c /tmp/fs-cache"
fi
if [ ! -z "$FILENAME_TEMPLATE" ]; then
EXTRA_PARAMS="--filename-template $FILENAME_TEMPLATE $EXTRA_PARAMS"

View File

@ -9,15 +9,15 @@
#
HOOK=
SERVER_URL=
AWS_BUCKET_NAME=
LOCAL_CACHE_DIR=
getsha512(){
echo "$(openssl sha512 "${1}" | awk '{print $2}')"
openssl sha512 "${1}" | awk '{print $2}'
}
print_usage(){
echo "$(basename $0) -A SERVER-URL [-c LOCAL-CACHE-DIR-PATH] [-g] [-u] PATH-FROM-URL PATH-TO-URL PATH-PATCH"
echo "$(basename "$0") [-S S3-BUCKET-NAME] [-c LOCAL-CACHE-DIR-PATH] [-g] [-u] PATH-FROM-URL PATH-TO-URL PATH-PATCH"
echo "Script that saves/retrieves from cache presumptive patches as args"
echo ""
echo "-A SERVER-URL - host where to send the files"
@ -31,74 +31,76 @@ print_usage(){
}
upload_patch(){
sha_from=`getsha512 "$1"`
sha_to=`getsha512 "$2"`
sha_from=$(getsha512 "$1")
sha_to=$(getsha512 "$2")
patch_path="$3"
patch_filename="$(basename "$3")"
# save to local cache first
if [ -n "$LOCAL_CACHE_DIR" ]; then
local_cmd="mkdir -p "$LOCAL_CACHE_DIR/$sha_from""
if `$local_cmd` >&2; then
cp -avf "$patch_path" "$LOCAL_CACHE_DIR/$sha_from/$sha_to"
echo "$patch_path saved on local cache!"
if $local_cmd >&2; then
cp -avf "${patch_path}" "$LOCAL_CACHE_DIR/$sha_from/$sha_to"
echo "${patch_path} saved on local cache."
fi
fi
# The remote cache implementation is not used. The code is for usage
# reference only.
return 0
# send it over to funsize
cmd="curl -sSw %{http_code} -o /dev/null -X POST $SERVER_URL -F sha_from="$sha_from" -F sha_to="$sha_to" -F patch_file="@$patch_path""
ret_code=`$cmd`
if [ $ret_code -eq 200 ]; then
echo "$patch_path Successful uploaded to funsize!"
return 0
if [ -n "${AWS_BUCKET_NAME}" ]; then
BUCKET_PATH="s3://${AWS_BUCKET_NAME}${sha_from}/${sha_to}/${patch_filename}"
if aws s3 cp "${patch_path}" "${BUCKET_PATH}"; then
echo "${patch_path} saved on s://${AWS_BUCKET_NAME}"
return 0
fi
echo "${patch_path} failed to be uploaded to s3://${AWS_BUCKET_NAME}"
return 1
fi
echo "$patch_path Failed to be uploaded to funsize!"
return 1
return 0
}
get_patch(){
sha_from=`getsha512 "$1"`
sha_to=`getsha512 "$2"`
# $1 and $2 are the /path/to/filename
sha_from=$(getsha512 "$1")
sha_to=$(getsha512 "$2")
destination_file="$3"
tmp_file="$destination_file.tmp"
s3_filename="$(basename "$3")"
# try to retrieve from local cache first
if [ -r "$LOCAL_CACHE_DIR/$sha_from/$sha_to" ]; then
cp -avf "$LOCAL_CACHE_DIR/$sha_from/$sha_to" "$destination_file"
echo "Successful retrieved $destination_file from local cache!"
return 0
else
echo "File is not in the locale cache"
return 1
# Try to retrieve from local cache first.
if [ -n "$LOCAL_CACHE_DIR" ]; then
if [ -r "$LOCAL_CACHE_DIR/$sha_from/$sha_to" ]; then
cp -avf "$LOCAL_CACHE_DIR/$sha_from/$sha_to" "$destination_file"
echo "Successful retrieved ${destination_file} from local cache."
return 0
fi
fi
# The remote cache implementation is not used. The code is for usage
# reference only.
# If not in the local cache, we might find it remotely.
# if unsuccessful, try to retrieve from funsize
cmd="curl -LsSGw %{http_code} $SERVER_URL/$sha_from/$sha_to -o $tmp_file"
ret_code=`$cmd`
if [ $ret_code -eq 200 ]; then
mv "$tmp_file" "$destination_file"
echo "Successful retrieved $destination_file from funsize!"
return 0
if [ -n "${AWS_BUCKET_NAME}" ]; then
BUCKET_PATH="s3://${AWS_BUCKET_NAME}${sha_from}/${sha_to}/${s3_filename}"
if aws s3 ls "${BUCKET_PATH}"; then
if aws s3 cp "${BUCKET_PATH}" "${destination_file}"; then
echo "Successful retrieved ${destination_file} from s3://${AWS_BUCKET_NAME}"
return 0
else
echo "Failed to retrieve ${destination_file} from s3://${AWS_BUCKET_NAME}"
return 1
fi
# Not found, fall through to default error
fi
fi
rm -f "$tmp_file"
echo "Failed to retrieve $destination_file from funsize!"
return 1
}
OPTIND=1
while getopts ":A:c:gu" option; do
while getopts ":S:c:gu" option; do
case $option in
A)
SERVER_URL="$OPTARG"
S)
# This will probably be bucketname/path/prefix but we can use it either way
AWS_BUCKET_NAME="$OPTARG"
# Ensure trailing slash is there.
if [[ ! $AWS_BUCKET_NAME =~ .*/$ ]]; then
AWS_BUCKET_NAME="${AWS_BUCKET_NAME}/"
fi
;;
c)
LOCAL_CACHE_DIR="$OPTARG"

View File

@ -149,7 +149,10 @@ def make_task_description(config, jobs):
dep_job.task["metadata"]["description"]),
'worker-type': 'aws-provisioner-v1/gecko-%s-b-linux' % level,
'dependencies': dependencies,
'scopes': ['secrets:get:project/releng/gecko/build/level-%s/datadog-api-key' % level],
'scopes': [
'secrets:get:project/releng/gecko/build/level-%s/datadog-api-key' % level,
'auth:aws-s3:read-write:tc-gp-private-1d-us-east-1/releng/mbsdiff-cache/'
],
'attributes': attributes,
'run-on-projects': dep_job.attributes.get('run_on_projects'),
'treeherder': treeherder,