For migrating the S3 storage, the following python3 script by @schnetqh can be used:
#!/usr/bin/env python
import boto3
# source credentials
url_src = "https://source-server.net"
access_key_src = "source-access-key"
secret_key_src = "secret-source-secret"
# destination credentials
url_dst = "https://destination-server.org"
access_key_dst = "source-destination-key"
secret_key_dst = "secret-destination-secret"
# Bucket prefix (same for source and destination)
bucket_prefix = "some-bucket-prefix"
# Create an S3 client with the specified endpoint URL and credentials
s3_src = boto3.client(
"s3",
endpoint_url=url_src,
aws_access_key_id=access_key_src,
aws_secret_access_key=secret_key_src,
)
s3_dst = boto3.client(
"s3",
endpoint_url=url_dst,
aws_access_key_id=access_key_dst,
aws_secret_access_key=secret_key_dst,
)
def sync_bucket(bucket):
# list bucket objects
# TODO: use delimiter and stuff
# TODO: check if delimiter are broken
objectlist = s3_src.list_objects(Bucket=bucket)
# iterate over object
print(f"Upload objects from BUCKET: {bucket}")
s3_dst.create_bucket(Bucket=bucket)
for obj in objectlist["Contents"]:
print(f'Upload content from {obj["Key"]}')
data = s3_src.get_object(Bucket=bucket, Key=obj["Key"])
# get object data
content = data["Body"].read()
# put object data
s3_dst.put_object(Bucket=bucket, Key=obj["Key"], Body=content)
# TODO: compare etag
if __name__=="__main__":
for bucket in s3_src.list_buckets()["Buckets"]:
if bucket["Name"].startswith(bucket_prefix):
print(f'Start copy of {bucket["Name"]}')
sync_bucket(bucket["Name"])
One critical detail: When using S3 on our Ceph cluster, S3_REGION
in the .env
file must be set to rub-zg
, otherwise uploading new files will fail. This appears to be a technical limitation of the S3 library that Ruby/RDMS/Hyrax is using (in theory, it should be possible for an S3 client to find out the default region of a bucket by itself).