Better moving system

2026-02-04 03:51:19 +01:00 · 2024-12-10 17:18:10 +01:00
parent 0cfa4af236
commit fdb1d694d9
3 changed files with 67 additions and 99 deletions
--- a/archive/README.md
+++ b/archive/README.md
@@ -3,6 +3,6 @@
 Builds are stored on a Cloudflare R2 instance at `https://builds.rebootfn.org/versions.json`.
 If you want to move them to another AWS-compatible object storage, run:
 ```
-move.ps1
+python move.py
 ```
 and provide the required parameters.
--- a/archive/move.ps1
+++ b/archive/move.ps1
@@ -1,98 +0,0 @@
-param(
-    [Parameter(Mandatory=$true)]
-    [string]$UrlListPath,               # Path to a text file with one URL per line
-
-    [Parameter(Mandatory=$true)]
-    [string]$BucketName,                # Name of the R2 bucket
-
-    [Parameter(Mandatory=$true)]
-    [string]$AccessKey,                 # Your R2 access key
-
-    [Parameter(Mandatory=$true)]
-    [string]$SecretKey,                 # Your R2 secret key
-
-    [Parameter(Mandatory=$true)]
-    [string]$EndPointURL,               # Your R2 endpoint URL, e.g. https://<account_id>.r2.cloudflarestorage.com
-
-    [Parameter(Mandatory=$false)]
-    [int]$MaxConcurrentConnections = 16, # Number of concurrent connections for each file download
-
-    [Parameter(Mandatory=$false)]
-    [int]$SplitCount = 16,               # Number of segments to split the download into
-
-    [Parameter(Mandatory=$false)]
-    [string]$AwsRegion = "auto"          # Region; often "auto" works for R2, but can be set if needed
-)
-
-# Set AWS environment variables for this session
-$Env:AWS_ACCESS_KEY_ID = $AccessKey
-$Env:AWS_SECRET_ACCESS_KEY = $SecretKey
-$Env:AWS_REGION = $AwsRegion  # If required, or leave as "auto"
-
-# Read all URLs from file
-$Urls = Get-Content $UrlListPath | Where-Object { $_ -and $_. Trim() -ne "" }
-
-# Ensure aria2 is available
-if (-not (Get-Command aria2c -ErrorAction SilentlyContinue)) {
-    Write-Error "aria2c not found in PATH. Please install aria2."
-    exit 1
-}
-
-# Ensure aws CLI is available
-if (-not (Get-Command aws -ErrorAction SilentlyContinue)) {
-    Write-Error "aws CLI not found in PATH. Please install AWS CLI."
-    exit 1
-}
-
-function Process-Url {
-    param(
-        [string]$Url,
-        [string]$BucketName,
-        [string]$EndPointURL,
-        [int]$MaxConcurrentConnections,
-        [int]$SplitCount
-    )
-
-    # Extract the filename from the URL
-    $FileName = Split-Path -Leaf $Url
-
-    try {
-        Write-Host "Downloading: $Url"
-
-        # Use aria2c to download with multiple connections
-        & aria2c `
-            --max-connection-per-server=$MaxConcurrentConnections `
-            --split=$SplitCount `
-            --out=$FileName `
-            --check-certificate=false `
-            --header="Cookie: _c_t_c=1" `
-            $Url
-
-        if (!(Test-Path $FileName)) {
-            Write-Host "Failed to download $Url"
-            return
-        }
-
-        Write-Host "Uploading $FileName to R2 bucket: $BucketName"
-        & aws s3 cp $FileName "s3://$BucketName/$FileName" --endpoint-url $EndPointURL
-        if ($LASTEXITCODE -ne 0) {
-            Write-Host "Failed to upload $FileName to R2"
-            return
-        }
-
-        Write-Host "Upload successful. Deleting local file: $FileName"
-        Remove-Item $FileName -Force
-
-        Write-Host "Completed processing of $FileName."
-
-    } catch {
-        Write-Host "Error processing $Url"
-        Write-Host $_
-    }
-}
-
-# Process each URL sequentially here. If you'd like to run multiple URLs in parallel,
-# you could replace the foreach loop with a ForEach-Object -Parallel block.
-foreach ($Url in $Urls) {
-    Process-Url -Url $Url -BucketName $BucketName -EndPointURL $EndPointURL -MaxConcurrentConnections $MaxConcurrentConnections -SplitCount $SplitCount
-}
--- a/archive/move.py
+++ b/archive/move.py
@@ -0,0 +1,66 @@
+import argparse
+import os
+import requests
+import boto3
+
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from urllib.parse import urlparse
+
+def upload_url_to_s3(s3_client, bucket_name, url, object_key):
+    response = requests.get(url, stream=True, verify=False, headers={"Cookie": "_c_t_c=1"})
+    response.raise_for_status()
+    s3_client.upload_fileobj(response.raw, bucket_name, object_key)
+    return url, object_key
+
+def derive_key_from_url(url, prefix=None):
+    parsed = urlparse(url)
+    filename = os.path.basename(parsed.path)
+    if prefix:
+        return f"{prefix}/{filename}"
+    else:
+        return filename
+
+def main():
+    parser = argparse.ArgumentParser(description="Upload multiple URLs from versions.txt to an S3 bucket concurrently.")
+    parser.add_argument('--bucket', required=True, help="Name of the S3 bucket.")
+    parser.add_argument('--concurrency', required=True, type=int, help="Number of concurrent uploads.")
+    parser.add_argument('--versions-file', default='versions.txt', help="File containing one URL per line.")
+    parser.add_argument('--access-key', required=True, help="AWS Access Key ID.")
+    parser.add_argument('--secret-key', required=True, help="AWS Secret Access Key.")
+    parser.add_argument('--endpoint-url', required=True, help="Custom endpoint URL for S3 or S3-compatible storage.")
+    args = parser.parse_args()
+
+    bucket_name = args.bucket
+    concurrency = args.concurrency
+    versions_file = args.versions_file
+    access_key = args.access_key
+    secret_key = args.secret_key
+    endpoint_url = args.endpoint_url
+
+    with open(versions_file, 'r') as f:
+        urls = [line.strip() for line in f if line.strip()]
+
+    print(f"Uploading {len(urls)} files...")
+    s3_params = {}
+    if access_key and secret_key:
+        s3_params['aws_access_key_id'] = access_key
+        s3_params['aws_secret_access_key'] = secret_key
+    if endpoint_url:
+        s3_params['endpoint_url'] = endpoint_url
+
+    s3 = boto3.client('s3', **s3_params)
+
+    futures = []
+    with ThreadPoolExecutor(max_workers=concurrency) as executor:
+        for url in urls:
+            object_key = derive_key_from_url(url)
+            futures.append(executor.submit(upload_url_to_s3, s3, bucket_name, url, object_key))
+        for future in as_completed(futures):
+            try:
+                uploaded_url, uploaded_key = future.result()
+                print(f"Uploaded: {uploaded_url} -> s3://{bucket_name}/{uploaded_key}")
+            except Exception as e:
+                print(f"Error uploading: {e}")
+
+if __name__ == "__main__":
+    main()