diff --git a/archive/README.md b/archive/README.md index 1f36bf4..8991a13 100644 --- a/archive/README.md +++ b/archive/README.md @@ -3,6 +3,6 @@ Builds are stored on a Cloudflare R2 instance at `https://builds.rebootfn.org/versions.json`. If you want to move them to another AWS-compatible object storage, run: ``` -move.ps1 +python move.py ``` and provide the required parameters. \ No newline at end of file diff --git a/archive/move.ps1 b/archive/move.ps1 deleted file mode 100644 index aee0fe5..0000000 --- a/archive/move.ps1 +++ /dev/null @@ -1,98 +0,0 @@ -param( - [Parameter(Mandatory=$true)] - [string]$UrlListPath, # Path to a text file with one URL per line - - [Parameter(Mandatory=$true)] - [string]$BucketName, # Name of the R2 bucket - - [Parameter(Mandatory=$true)] - [string]$AccessKey, # Your R2 access key - - [Parameter(Mandatory=$true)] - [string]$SecretKey, # Your R2 secret key - - [Parameter(Mandatory=$true)] - [string]$EndPointURL, # Your R2 endpoint URL, e.g. https://.r2.cloudflarestorage.com - - [Parameter(Mandatory=$false)] - [int]$MaxConcurrentConnections = 16, # Number of concurrent connections for each file download - - [Parameter(Mandatory=$false)] - [int]$SplitCount = 16, # Number of segments to split the download into - - [Parameter(Mandatory=$false)] - [string]$AwsRegion = "auto" # Region; often "auto" works for R2, but can be set if needed -) - -# Set AWS environment variables for this session -$Env:AWS_ACCESS_KEY_ID = $AccessKey -$Env:AWS_SECRET_ACCESS_KEY = $SecretKey -$Env:AWS_REGION = $AwsRegion # If required, or leave as "auto" - -# Read all URLs from file -$Urls = Get-Content $UrlListPath | Where-Object { $_ -and $_. Trim() -ne "" } - -# Ensure aria2 is available -if (-not (Get-Command aria2c -ErrorAction SilentlyContinue)) { - Write-Error "aria2c not found in PATH. Please install aria2." - exit 1 -} - -# Ensure aws CLI is available -if (-not (Get-Command aws -ErrorAction SilentlyContinue)) { - Write-Error "aws CLI not found in PATH. Please install AWS CLI." - exit 1 -} - -function Process-Url { - param( - [string]$Url, - [string]$BucketName, - [string]$EndPointURL, - [int]$MaxConcurrentConnections, - [int]$SplitCount - ) - - # Extract the filename from the URL - $FileName = Split-Path -Leaf $Url - - try { - Write-Host "Downloading: $Url" - - # Use aria2c to download with multiple connections - & aria2c ` - --max-connection-per-server=$MaxConcurrentConnections ` - --split=$SplitCount ` - --out=$FileName ` - --check-certificate=false ` - --header="Cookie: _c_t_c=1" ` - $Url - - if (!(Test-Path $FileName)) { - Write-Host "Failed to download $Url" - return - } - - Write-Host "Uploading $FileName to R2 bucket: $BucketName" - & aws s3 cp $FileName "s3://$BucketName/$FileName" --endpoint-url $EndPointURL - if ($LASTEXITCODE -ne 0) { - Write-Host "Failed to upload $FileName to R2" - return - } - - Write-Host "Upload successful. Deleting local file: $FileName" - Remove-Item $FileName -Force - - Write-Host "Completed processing of $FileName." - - } catch { - Write-Host "Error processing $Url" - Write-Host $_ - } -} - -# Process each URL sequentially here. If you'd like to run multiple URLs in parallel, -# you could replace the foreach loop with a ForEach-Object -Parallel block. -foreach ($Url in $Urls) { - Process-Url -Url $Url -BucketName $BucketName -EndPointURL $EndPointURL -MaxConcurrentConnections $MaxConcurrentConnections -SplitCount $SplitCount -} diff --git a/archive/move.py b/archive/move.py new file mode 100644 index 0000000..27d5cb3 --- /dev/null +++ b/archive/move.py @@ -0,0 +1,66 @@ +import argparse +import os +import requests +import boto3 + +from concurrent.futures import ThreadPoolExecutor, as_completed +from urllib.parse import urlparse + +def upload_url_to_s3(s3_client, bucket_name, url, object_key): + response = requests.get(url, stream=True, verify=False, headers={"Cookie": "_c_t_c=1"}) + response.raise_for_status() + s3_client.upload_fileobj(response.raw, bucket_name, object_key) + return url, object_key + +def derive_key_from_url(url, prefix=None): + parsed = urlparse(url) + filename = os.path.basename(parsed.path) + if prefix: + return f"{prefix}/{filename}" + else: + return filename + +def main(): + parser = argparse.ArgumentParser(description="Upload multiple URLs from versions.txt to an S3 bucket concurrently.") + parser.add_argument('--bucket', required=True, help="Name of the S3 bucket.") + parser.add_argument('--concurrency', required=True, type=int, help="Number of concurrent uploads.") + parser.add_argument('--versions-file', default='versions.txt', help="File containing one URL per line.") + parser.add_argument('--access-key', required=True, help="AWS Access Key ID.") + parser.add_argument('--secret-key', required=True, help="AWS Secret Access Key.") + parser.add_argument('--endpoint-url', required=True, help="Custom endpoint URL for S3 or S3-compatible storage.") + args = parser.parse_args() + + bucket_name = args.bucket + concurrency = args.concurrency + versions_file = args.versions_file + access_key = args.access_key + secret_key = args.secret_key + endpoint_url = args.endpoint_url + + with open(versions_file, 'r') as f: + urls = [line.strip() for line in f if line.strip()] + + print(f"Uploading {len(urls)} files...") + s3_params = {} + if access_key and secret_key: + s3_params['aws_access_key_id'] = access_key + s3_params['aws_secret_access_key'] = secret_key + if endpoint_url: + s3_params['endpoint_url'] = endpoint_url + + s3 = boto3.client('s3', **s3_params) + + futures = [] + with ThreadPoolExecutor(max_workers=concurrency) as executor: + for url in urls: + object_key = derive_key_from_url(url) + futures.append(executor.submit(upload_url_to_s3, s3, bucket_name, url, object_key)) + for future in as_completed(futures): + try: + uploaded_url, uploaded_key = future.result() + print(f"Uploaded: {uploaded_url} -> s3://{bucket_name}/{uploaded_key}") + except Exception as e: + print(f"Error uploading: {e}") + +if __name__ == "__main__": + main()