Upload files to shared team s3 storage

How to upload files to shared team S3 storage¶

A DeepESDL example notebook¶

This notebook demonstrates how to upload files to shared team s3 storage and how to access them using xcube.

Please, also refer to the DeepESDL documentation and visit the platform's website for further information!

Brockmann Consult, 2024

This notebook runs with the python environment deepesdl-xcube-1.7.0, please checkout the documentation for help on changing the environment.

In [1]:

Copied!





# needed for uploading files to s3 storage
import os
import boto3
from botocore.exceptions import NoCredentialsError

# needed for access of uploaded files
from xcube.core.store import new_data_store
# needed for uploading files to s3 storage
import os
import boto3
from botocore.exceptions import NoCredentialsError

# needed for access of uploaded files
from xcube.core.store import new_data_store

Get the environment variables, which are necessary for later specifications

In [2]:

Copied!

S3_USER_STORAGE_KEY = os.environ["S3_USER_STORAGE_KEY"]
S3_USER_STORAGE_SECRET = os.environ["S3_USER_STORAGE_SECRET"]
S3_USER_STORAGE_BUCKET = os.environ["endpoint"]
S3_USER_STORAGE_KEY = os.environ["S3_USER_STORAGE_KEY"]
S3_USER_STORAGE_SECRET = os.environ["S3_USER_STORAGE_SECRET"]
S3_USER_STORAGE_BUCKET = os.environ["endpoint"]

Connect to your team storage in S3

In [3]:

Copied!





# Note:If you use a prefix when uploading the data so you need the parameter max_depth=2
store = new_data_store("s3",
                       root=S3_USER_STORAGE_BUCKET,
                       storage_options=dict(anon=False,
                                            key=S3_USER_STORAGE_KEY,
                                            secret=S3_USER_STORAGE_SECRET))
# Note:If you use a prefix when uploading the data so you need the parameter max_depth=2
store = new_data_store("s3",
                       root=S3_USER_STORAGE_BUCKET,
                       storage_options=dict(anon=False,
                                            key=S3_USER_STORAGE_KEY,
                                            secret=S3_USER_STORAGE_SECRET))

You can check which dataformats are supported in xcube s3 store. This way you can find out which files you could easily store and then access by xcube from the s3 team storage space.

In [4]:

Copied!

store.get_data_opener_ids()
store.get_data_opener_ids()

Out[4]:

('dataset:netcdf:s3',
 'dataset:zarr:s3',
 'dataset:levels:s3',
 'mldataset:levels:s3',
 'dataset:geotiff:s3',
 'mldataset:geotiff:s3',
 'geodataframe:shapefile:s3',
 'geodataframe:geojson:s3')

To upload files from your workspace to the s3 team shared storage, you must specify where your input files are:

In [5]:

Copied!

input_datasets_dir = os.path.expanduser("~/<path-to-your-files>")
input_datasets_dir = os.path.expanduser("~/")

In [6]:

Copied!

local_store = new_data_store("file",
                             root=input_datasets_dir)
local_store = new_data_store("file",
                             root=input_datasets_dir)

In [7]:

Copied!

list(local_store.get_data_ids())
list(local_store.get_data_ids())

Out[7]:

[]

In [8]:

Copied!





# function used to upload data to s3 storage
def upload_to_team_s3_bucket(local_file, bucket, s3_file):
    s3 = boto3.client('s3', 
                      aws_access_key_id=S3_USER_STORAGE_KEY,
                      aws_secret_access_key=S3_USER_STORAGE_SECRET)

    try:
        s3.upload_file(local_file, bucket, s3_file)
        print(f"Upload Successful of file {local_file}")
        return True
    except NoCredentialsError:
        print("Credentials not available")
        return False
# function used to upload data to s3 storage
def upload_to_team_s3_bucket(local_file, bucket, s3_file):
    s3 = boto3.client('s3', 
                      aws_access_key_id=S3_USER_STORAGE_KEY,
                      aws_secret_access_key=S3_USER_STORAGE_SECRET)

    try:
        s3.upload_file(local_file, bucket, s3_file)
        print(f"Upload Successful of file {local_file}")
        return True
    except NoCredentialsError:
        print("Credentials not available")
        return False

In [ ]:

Copied!

# filter only for files in the directory
data_files = [file for file in os.listdir(input_datasets_dir) if os.path.isfile(os.path.join(input_datasets_dir, file))]
# filter only for files in the directory
data_files = [file for file in os.listdir(input_datasets_dir) if os.path.isfile(os.path.join(input_datasets_dir, file))]

In [ ]:

Copied!

data_files
data_files

In [ ]:

Copied!

prefix = "input-datasets" # giving a prefix, so a direcory like structure is created in s3 
prefix = "input-datasets" # giving a prefix, so a direcory like structure is created in s3

In [ ]:

Copied!





# looping through datasets and uploading them to s3 
for data_file in data_files:
    path = os.path.join(input_datasets_dir, data_file)
    target_path = f"{prefix}/{data_file}"
    upload_to_team_s3_bucket(path, S3_USER_STORAGE_BUCKET, target_path)
# looping through datasets and uploading them to s3 
for data_file in data_files:
    path = os.path.join(input_datasets_dir, data_file)
    target_path = f"{prefix}/{data_file}"
    upload_to_team_s3_bucket(path, S3_USER_STORAGE_BUCKET, target_path)

Now lets check for the data: You need to instantiate a s3 datastore pointing to the deep-esdl-output bucket:

In [ ]:

Copied!





# Note: If you use a prefix when uploading the data so you need the parameter max_depth=2
store = new_data_store("s3",
                       max_depth=2,
                       root=S3_USER_STORAGE_BUCKET,
                       storage_options=dict(anon=False,
                                            key=S3_USER_STORAGE_KEY,
                                            secret=S3_USER_STORAGE_SECRET))
# Note: If you use a prefix when uploading the data so you need the parameter max_depth=2
store = new_data_store("s3",
                       max_depth=2,
                       root=S3_USER_STORAGE_BUCKET,
                       storage_options=dict(anon=False,
                                            key=S3_USER_STORAGE_KEY,
                                            secret=S3_USER_STORAGE_SECRET))

In [ ]:

Copied!

store.describe_data('input-datasets/sample01-geotiff.tif')
store.describe_data('input-datasets/sample01-geotiff.tif')

In case you wish to delete data:

In [ ]:

Copied!

store.delete_data('input-datasets/sample01-geotiff.tif')
store.delete_data('input-datasets/sample01-geotiff.tif')

In [ ]:

Copied!

store.delete_data('input-datasets/sample02-geotiff.tif')
store.delete_data('input-datasets/sample02-geotiff.tif')

In [ ]:

Copied!

list(store.get_data_ids())
list(store.get_data_ids())

In [ ]: