Better moving system

This commit is contained in:
Alessandro Autiero
2024-12-10 17:18:10 +01:00
parent 0cfa4af236
commit fdb1d694d9
3 changed files with 67 additions and 99 deletions

View File

@@ -3,6 +3,6 @@
Builds are stored on a Cloudflare R2 instance at `https://builds.rebootfn.org/versions.json`.
If you want to move them to another AWS-compatible object storage, run:
```
move.ps1
python move.py
```
and provide the required parameters.

View File

@@ -1,98 +0,0 @@
param(
[Parameter(Mandatory=$true)]
[string]$UrlListPath, # Path to a text file with one URL per line
[Parameter(Mandatory=$true)]
[string]$BucketName, # Name of the R2 bucket
[Parameter(Mandatory=$true)]
[string]$AccessKey, # Your R2 access key
[Parameter(Mandatory=$true)]
[string]$SecretKey, # Your R2 secret key
[Parameter(Mandatory=$true)]
[string]$EndPointURL, # Your R2 endpoint URL, e.g. https://<account_id>.r2.cloudflarestorage.com
[Parameter(Mandatory=$false)]
[int]$MaxConcurrentConnections = 16, # Number of concurrent connections for each file download
[Parameter(Mandatory=$false)]
[int]$SplitCount = 16, # Number of segments to split the download into
[Parameter(Mandatory=$false)]
[string]$AwsRegion = "auto" # Region; often "auto" works for R2, but can be set if needed
)
# Set AWS environment variables for this session
$Env:AWS_ACCESS_KEY_ID = $AccessKey
$Env:AWS_SECRET_ACCESS_KEY = $SecretKey
$Env:AWS_REGION = $AwsRegion # If required, or leave as "auto"
# Read all URLs from file
$Urls = Get-Content $UrlListPath | Where-Object { $_ -and $_. Trim() -ne "" }
# Ensure aria2 is available
if (-not (Get-Command aria2c -ErrorAction SilentlyContinue)) {
Write-Error "aria2c not found in PATH. Please install aria2."
exit 1
}
# Ensure aws CLI is available
if (-not (Get-Command aws -ErrorAction SilentlyContinue)) {
Write-Error "aws CLI not found in PATH. Please install AWS CLI."
exit 1
}
function Process-Url {
param(
[string]$Url,
[string]$BucketName,
[string]$EndPointURL,
[int]$MaxConcurrentConnections,
[int]$SplitCount
)
# Extract the filename from the URL
$FileName = Split-Path -Leaf $Url
try {
Write-Host "Downloading: $Url"
# Use aria2c to download with multiple connections
& aria2c `
--max-connection-per-server=$MaxConcurrentConnections `
--split=$SplitCount `
--out=$FileName `
--check-certificate=false `
--header="Cookie: _c_t_c=1" `
$Url
if (!(Test-Path $FileName)) {
Write-Host "Failed to download $Url"
return
}
Write-Host "Uploading $FileName to R2 bucket: $BucketName"
& aws s3 cp $FileName "s3://$BucketName/$FileName" --endpoint-url $EndPointURL
if ($LASTEXITCODE -ne 0) {
Write-Host "Failed to upload $FileName to R2"
return
}
Write-Host "Upload successful. Deleting local file: $FileName"
Remove-Item $FileName -Force
Write-Host "Completed processing of $FileName."
} catch {
Write-Host "Error processing $Url"
Write-Host $_
}
}
# Process each URL sequentially here. If you'd like to run multiple URLs in parallel,
# you could replace the foreach loop with a ForEach-Object -Parallel block.
foreach ($Url in $Urls) {
Process-Url -Url $Url -BucketName $BucketName -EndPointURL $EndPointURL -MaxConcurrentConnections $MaxConcurrentConnections -SplitCount $SplitCount
}

66
archive/move.py Normal file
View File

@@ -0,0 +1,66 @@
import argparse
import os
import requests
import boto3
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urlparse
def upload_url_to_s3(s3_client, bucket_name, url, object_key):
response = requests.get(url, stream=True, verify=False, headers={"Cookie": "_c_t_c=1"})
response.raise_for_status()
s3_client.upload_fileobj(response.raw, bucket_name, object_key)
return url, object_key
def derive_key_from_url(url, prefix=None):
parsed = urlparse(url)
filename = os.path.basename(parsed.path)
if prefix:
return f"{prefix}/{filename}"
else:
return filename
def main():
parser = argparse.ArgumentParser(description="Upload multiple URLs from versions.txt to an S3 bucket concurrently.")
parser.add_argument('--bucket', required=True, help="Name of the S3 bucket.")
parser.add_argument('--concurrency', required=True, type=int, help="Number of concurrent uploads.")
parser.add_argument('--versions-file', default='versions.txt', help="File containing one URL per line.")
parser.add_argument('--access-key', required=True, help="AWS Access Key ID.")
parser.add_argument('--secret-key', required=True, help="AWS Secret Access Key.")
parser.add_argument('--endpoint-url', required=True, help="Custom endpoint URL for S3 or S3-compatible storage.")
args = parser.parse_args()
bucket_name = args.bucket
concurrency = args.concurrency
versions_file = args.versions_file
access_key = args.access_key
secret_key = args.secret_key
endpoint_url = args.endpoint_url
with open(versions_file, 'r') as f:
urls = [line.strip() for line in f if line.strip()]
print(f"Uploading {len(urls)} files...")
s3_params = {}
if access_key and secret_key:
s3_params['aws_access_key_id'] = access_key
s3_params['aws_secret_access_key'] = secret_key
if endpoint_url:
s3_params['endpoint_url'] = endpoint_url
s3 = boto3.client('s3', **s3_params)
futures = []
with ThreadPoolExecutor(max_workers=concurrency) as executor:
for url in urls:
object_key = derive_key_from_url(url)
futures.append(executor.submit(upload_url_to_s3, s3, bucket_name, url, object_key))
for future in as_completed(futures):
try:
uploaded_url, uploaded_key = future.result()
print(f"Uploaded: {uploaded_url} -> s3://{bucket_name}/{uploaded_key}")
except Exception as e:
print(f"Error uploading: {e}")
if __name__ == "__main__":
main()