Cloud Data Access

Important

Running these example recipes requires an aws installation to include the optional dependencies.

Access a model hosted on s3

import pyTMD

# set tide model and s3 bucket
s3_bucket = "pytmd-scratch"
tide_model = "GOT5.5"
# s3 bucket url
s3_url = f"https://{s3_bucket}.s3.us-west-2.amazonaws.com"
# setup tide model
m = pyTMD.io.model(s3_url).from_database(tide_model)
# read tide model dataset
ds = m.open_dataset(group="z", chunks="auto")

Save model to a zarr store

import pyTMD
import zarr
import obstore

# set tide model and s3 bucket
s3_bucket = "pytmd-scratch"
tide_model = "FES2022"
# setup tide model
m = pyTMD.io.model().from_database(tide_model)
# read tide model as an xarray DataTree
dtree = m.open_datatree(tide_model)
# gap-fill missing data with inpainting
dtree = dtree.tmd.inpaint(N=100)

# setup zarr store using obstore
presigned_s3_url = f"s3://{s3_bucket}/{m.name}.zarr"
s3_store = obstore.store.S3Store.from_url(presigned_s3_url, region="us-west-2")
store = zarr.storage.ObjectStore(s3_store, read_only=False)
# save to zarr store
dtree.to_zarr(store, mode="w", zarr_format=3, consolidated=True)

Predict tides from model hosted on s3

import zarr
import pyTMD
import obstore
import timescale
import pandas as pd
import xarray as xr

# set tide model and s3 bucket
s3_bucket = "pytmd-scratch"
tide_model = "FES2022"
# setup tide model
m = pyTMD.io.model(verify=False).from_database(tide_model)

# setup s3 store
presigned_s3_url = f"s3://{s3_bucket}/{m.name}.zarr"
s3_store = obstore.store.S3Store.from_url(
    presigned_s3_url, region="us-west-2", skip_signature=True
)
# use read_only store for accessing data
store = zarr.storage.ObjectStore(s3_store, read_only=True)

# read zarr store for tide model
ds = xr.open_zarr(store, group="z", zarr_format=3)

# read data from parquet
df = pd.read_parquet("pytmd-test.parquet")
ts = timescale.from_deltatime(df.time, epoch=(2018, 1, 1), standard="GPS")

# create xarray DataArrays for coordinates in crs of model
x, y = ds.tmd.coords_as(df.x, df.y, type="trajectory", crs=3031)
# interpolate to points
local = ds.tmd.interp(x, y, method="linear")

# predict tides and infer minor constituents
df[m.z.variable] = local.tmd.predict(
    ts.tide, deltat=ts.tt_ut1, corrections=m.corrections
)
df[m.z.variable] += local.tmd.infer(
    ts.tide, deltat=ts.tt_ut1, corrections=m.corrections
)

# save model outputs to parquet
df.to_parquet(f"{m.name}.parquet")

Use s3fs to setup a zarr store

The above examples use obstore to access a zarr store on AWS s3. The s3fs package is an alternative method that uses a fsspec based store.

import pyTMD
import zarr
import s3fs

# set tide model and s3 bucket
s3_bucket = "pytmd-scratch"
tide_model = "FES2022"
# setup tide model
m = pyTMD.io.model().from_database(tide_model)

# setup zarr store using s3fs
fs = s3fs.S3FileSystem(anon=False, asynchronous=True)
store = zarr.storage.FsspecStore(fs, path=f"{s3_bucket}/{m.name}.zarr")