Source code for pyTMD.io.OTIS

#!/usr/bin/env python
"""
OTIS.py
Written by Tyler Sutterley (04/2026)

Reads OTIS format tidal solutions provided by Oregon State University and ESR
    http://volkov.oce.orst.edu/tides/region.html
    https://www.esr.org/research/polar-tide-models/list-of-polar-tide-models/
    ftp://ftp.esr.org/pub/datasets/tmd/

PYTHON DEPENDENCIES:
    numpy: Scientific Computing Tools For Python
        https://numpy.org
        https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
    pyproj: Python interface to PROJ library
        https://pypi.org/project/pyproj/
        https://pyproj4.github.io/pyproj/
    xarray: N-D labeled arrays and datasets in Python
        https://docs.xarray.dev/en/stable/

UPDATE HISTORY:
    Updated 04/2026: compact subaccessor should be to dataset
        added lineage attributes to save model filename(s)
    Updated 02/2026: make dataset and datatree accessors for OTIS
        be subaccessors from dataset module
    Updated 01/2026: check if flexure variable exists in TMD3 files
    Updated 12/2025: no longer subclassing pathlib.Path for working directories
    Updated 11/2025: near-complete rewrite of program to use xarray
    Updated 10/2025: refactored binary read programs
        added option to use memory mapping for reading large files
    Updated 08/2025: use numpy degree to radian conversions
        added option to gap fill when reading constituent grids
    Updated 05/2025: added option to select constituents to read from model
    Updated 12/2024: released version of TMD3 has different variable names
    Updated 11/2024: expose buffer distance for cropping tide model data
    Updated 10/2024: save latitude and longitude to output constituent object
        fix error when using default bounds in extract_constants
    Updated 09/2024: using new JSON dictionary format for model projections
    Updated 08/2024: revert change and assume crop bounds are projected
    Updated 07/2024: added crop and bounds keywords for trimming model data
        convert the crs of bounds when cropping model data
    Updated 06/2024: change int32 to int to prevent overflows with numpy 2.0
    Updated 02/2024: don't overwrite hu and hv in _interpolate_zeta
        changed variable for setting global grid flag to is_global
    Updated 01/2024: construct currents masks differently if not global
        renamed currents masks and bathymetry interpolation functions
    Updated 12/2023: use new crs class for coordinate reprojection
    Updated 10/2023: fix transport variable entry for TMD3 models
    Updated 09/2023: prevent overwriting ATLAS compact x and y coordinates
    Updated 08/2023: changed ESR netCDF4 format to TMD3 format
    Updated 04/2023: using pathlib to define and expand tide model paths
    Updated 03/2023: add basic variable typing to function inputs
        new function name for converting coordinate reference systems
    Updated 12/2022: refactor tide read programs under io
        new functions to read and interpolate from constituents class
        refactored interpolation routines into new module
    Updated 11/2022: place some imports within try/except statements
        fix variable reads for ATLAS compact data formats
        use f-strings for formatting verbose or ascii output
    Updated 10/2022: invert current tide masks to be True for invalid points
    Updated 06/2022: unit updates in the ESR netCDF4 format
    Updated 05/2022: add functions for using ESR netCDF4 format models
        changed keyword arguments to camel case
    Updated 04/2022: updated docstrings to numpy documentation format
        use longcomplex data format to be windows compliant
    Updated 03/2022: invert tide mask to be True for invalid points
        add separate function for resampling ATLAS compact global model
        decode ATLAS compact constituents for Python3 compatibility
        reduce iterative steps when combining ATLAS local models
    Updated 02/2022: use ceiling of masks for interpolation
    Updated 07/2021: added checks that tide model files are accessible
    Updated 06/2021: fix tidal currents for bilinear interpolation
        check for nan points when reading elevation and transport files
    Updated 05/2021: added option for extrapolation cutoff in kilometers
    Updated 03/2021: add extrapolation check where there are no invalid points
        prevent ComplexWarning for fill values when calculating amplitudes
        can read from single constituent TPXO9 ATLAS binary files
        replaced numpy bool/int to prevent deprecation warnings
    Updated 02/2021: set invalid values to nan in extrapolation
        replaced numpy bool to prevent deprecation warning
    Updated 12/2020: added valid data extrapolation with nearest_extrap
    Updated 09/2020: set bounds error to false for regular grid interpolations
        adjust dimensions of input coordinates to be iterable
        use masked arrays with atlas models and grids. make 2' grid with nearest
    Updated 08/2020: check that interpolated points are within range of model
        replaced griddata interpolation with scipy regular grid interpolators
    Updated 07/2020: added function docstrings. separate bilinear interpolation
        update griddata interpolation. changed type variable to keyword argument
    Updated 06/2020: output currents as numpy masked arrays
        use argmin and argmax in bilinear interpolation
    Updated 11/2019: interpolate heights and fluxes to numpy masked arrays
    Updated 09/2019: output as numpy masked arrays instead of nan-filled arrays
    Updated 01/2019: decode constituents for Python3 compatibility
    Updated 08/2018: added option grid for using ATLAS outputs that
        combine both global and localized tidal solutions
        added multivariate spline interpolation option
    Updated 07/2018: added different interpolation methods
    Updated 09/2017: Adapted for Python
"""

from __future__ import division, annotations

import pyproj
import pathlib
import warnings
import numpy as np
import xarray as xr
import pyTMD.utilities
from .dataset import (
    combine_attrs,
    register_dataset_subaccessor,
    register_datatree_subaccessor,
)

# attempt imports
dask = pyTMD.utilities.import_dependency("dask")
dask_available = pyTMD.utilities.dependency_available("dask")

# suppress warnings
warnings.filterwarnings("ignore", category=UserWarning)

__all__ = [
    "open_dataset",
    "open_mfdataset",
    "open_otis_dataset",
    "open_atlas_dataset",
    "open_tmd3_dataset",
    "open_otis_grid",
    "open_otis_elevation",
    "open_otis_transport",
    "open_atlas_grid",
    "open_atlas_elevation",
    "open_atlas_transport",
    "read_raw_binary",
    "write_raw_binary",
    "OTISDataset",
    "OTISDataTree",
    "CompactDataset",
]

# variable attributes
_attributes = dict(
    z=dict(
        bathymetry=dict(
            standard_name="ocean_depth",
            long_name="ocean depth",
            units="m",
        ),
        mask=dict(
            standard_name="sea_binary_mask",
            long_name="land-sea mask",
            units="1",
        ),
        elevation=dict(
            standard_name="tide_height",
            long_name="tidal_elevation",
            units="m",
        ),
    ),
    u=dict(
        bathymetry=dict(
            standard_name="ocean_depth",
            long_name="ocean depth at u-locations",
            units="m",
        ),
        mask=dict(
            standard_name="sea_binary_mask",
            long_name="land-sea mask at u-locations",
            units="1",
        ),
        current=dict(
            standard_name="eastward_tidal_current",
            long_name="zonal tidal currents on u-grid",
            units="m/s",
        ),
        transport=dict(
            standard_name="eastward_tidal_transport",
            long_name="zonal tidal transports on u-grid",
            units="m**2/s",
        ),
    ),
    v=dict(
        bathymetry=dict(
            standard_name="ocean_depth",
            long_name="ocean depth at v-locations",
            units="m",
        ),
        mask=dict(
            standard_name="sea_binary_mask",
            long_name="land-sea mask at v-locations",
            units="1",
        ),
        current=dict(
            standard_name="northward_tidal_current",
            long_name="meridional tidal currents on v-grid",
            units="m/s",
        ),
        transport=dict(
            standard_name="northward_tidal_transport",
            long_name="meridional tidal transports on v-grid",
            units="m**2/s",
        ),
    ),
)


# PURPOSE: read tide model files
[docs] def open_dataset( model_file: str | list | pathlib.Path, grid_file: str | pathlib.Path | None = None, format: str = "OTIS", **kwargs, ): """ Open OTIS/ATLAS/TMD3 model files Parameters ---------- model_file: str or pathlib.Path Input model file grid_file: str, pathlib.Path or None, default None Input model grid file format: str, default 'OTIS' Model format - ``'ATLAS'`` - ``'OTIS'`` - ``'TMD3'`` group: str, default 'z' Tidal variable to read - ``'z'``: heights - ``'u'``: zonal currents - ``'U'``: zonal depth-averaged transport - ``'v'``: meridional currents - ``'V'``: meridional depth-averaged transport kwargs: dict Additional keyword arguments for opening files Returns ------- ds: xarray.Dataset Tide model data """ # set default keyword arguments kwargs.setdefault("group", "z") group = kwargs.get("group").lower() # open file(s) as xarray dataset if format == "OTIS": # OTIS (single or multi-file) ds = open_otis_dataset(model_file, grid_file=grid_file, **kwargs) elif format == "ATLAS": # ATLAS-compact ds = open_atlas_dataset(model_file, grid_file=grid_file, **kwargs) elif format == "TMD3": # TMD3 netCDF4 ds = open_tmd3_dataset(model_file, **kwargs) # add attributes ds.attrs["format"] = format # convert transports to currents if necessary if kwargs["group"] in ("u", "v"): # convert transports to currents and update attributes for c in ds.tmd.constituents: ds[c] /= ds["bathymetry"] ds[c].attrs.update(_attributes[group]["current"]) # return xarray dataset return ds
# PURPOSE: read a list of model files
[docs] def open_mfdataset( model_files: list[str] | list[pathlib.Path], group: str = "z", **kwargs, ): """ Open multiple OTIS model files Parameters ---------- model_files: list of str or pathlib.Path List of OTIS model files group: str, default 'z' Tidal variable to read - ``'z'``: heights - ``'u'``: zonal currents - ``'U'``: zonal depth-averaged transport - ``'v'``: meridional currents - ``'V'``: meridional depth-averaged transport parallel: bool, default False Open files in parallel using ``dask.delayed`` kwargs: dict Additional keyword arguments for opening OTIS files Returns ------- ds: xarray.Dataset OTIS tide model data """ # set default keyword arguments kwargs.setdefault("parallel", False) parallel = kwargs.get("parallel") and dask_available # read each file as xarray dataset and append to list if (group == "z") and parallel: # elevations opener = dask.delayed(open_otis_elevation) (d,) = dask.compute([opener(f, **kwargs) for f in model_files]) elif group == "z": # elevations d = [open_otis_elevation(f, **kwargs) for f in model_files] elif group in ("u", "U") and parallel: # transports are returned as (u,v) opener = dask.delayed(open_otis_transport) (d,) = dask.compute([opener(f, **kwargs)[0] for f in model_files]) elif group in ("u", "U"): # transports are returned as (u,v) d = [open_otis_transport(f, **kwargs)[0] for f in model_files] elif group in ("v", "V") and parallel: # transports are returned as (u,v) opener = dask.delayed(open_otis_transport) (d,) = dask.compute([opener(f, **kwargs)[1] for f in model_files]) elif group in ("v", "V"): # transports are returned as (u,v) d = [open_otis_transport(f, **kwargs)[1] for f in model_files] # merge datasets ds = xr.merge(d, combine_attrs=combine_attrs, compat="override") # add attributes ds.attrs["group"] = group.upper() if group in ("u", "v") else group # return xarray dataset return ds
[docs] def open_otis_dataset( model_file: str | list | pathlib.Path, grid_file: str | pathlib.Path, group: str | None = "z", **kwargs, ): """ Open OTIS model files Parameters ---------- model_file: str, list or pathlib.Path Input model file(s) grid_file: str, pathlib.Path Input model grid file group: str, default 'z' Tidal variable to read - ``'z'``: heights - ``'u'``: zonal currents - ``'U'``: zonal depth-averaged transport - ``'v'``: meridional currents - ``'V'``: meridional depth-averaged transport crs: int, str or dict, default 4326 Coordinate reference system for the model data use_mmap: bool, default False Use memory mapping to read data Returns ------- ds: xarray.Dataset OTIS tide model data """ # default coordinate reference system crs = kwargs.get("crs", 4326) # open grid file ds1 = open_otis_grid(grid_file, **kwargs) # add attributes ds1.attrs["crs"] = pyproj.CRS.from_user_input(crs).to_dict() # open model file(s) if isinstance(model_file, list): # multi-file datasets ds2 = open_mfdataset(model_file, group=group, **kwargs) elif group == "z": # elevations ds2 = open_otis_elevation(model_file, **kwargs) elif group in ("u", "U"): # transports are returned as (u,v) ds2 = open_otis_transport(model_file, **kwargs)[0] elif group in ("v", "V"): # transports are returned as (u,v) ds2 = open_otis_transport(model_file, **kwargs)[1] # merge datasets ds = OTISDataset(ds1).merge(ds2, group=group) # add attributes ds.attrs["group"] = group.upper() if group in ("u", "v") else group # return xarray dataset return ds
[docs] def open_atlas_dataset( model_file: str | pathlib.Path, grid_file: str | pathlib.Path, group: str | None = "z", chunks: int | dict | str | None = None, use_mmap: bool = False, **kwargs, ): """ Open ATLAS model files Parameters ---------- model_file: str or pathlib.Path Input model file grid_file: str, pathlib.Path Input model grid file group: str, default 'z' Tidal variable to read - ``'z'``: heights - ``'u'``: zonal currents - ``'U'``: zonal depth-averaged transport - ``'v'``: meridional currents - ``'V'``: meridional depth-averaged transport crs: int, str or dict, default 4326 Coordinate reference system for the model data chunks: int, dict, str, or None, default None Coerce output to specified chunks use_mmap: bool, default False Use memory mapping to read data Returns ------- ds: xarray.Dataset ATLAS tide model data """ # default coordinate reference system crs = kwargs.get("crs", 4326) # open grid file dsg, dtg = open_atlas_grid(grid_file, use_mmap=use_mmap) ds1 = CompactDataset(dsg).combine_local(dtg, chunks=chunks) # add attributes ds1.attrs["crs"] = pyproj.CRS.from_user_input(crs).to_dict() # open model file(s) if group == "z": # elevations are returned as (z, localz) dsh, dth = open_atlas_elevation(model_file, use_mmap=use_mmap) ds2 = CompactDataset(dsh).combine_local(dth, chunks=chunks) elif group in ("u", "U"): # transports are returned as (u, v, localu, localv) dsu, dtu, dsv, dtv = open_atlas_transport(model_file, use_mmap=use_mmap) ds2 = CompactDataset(dsu).combine_local(dtu, chunks=chunks) elif group in ("v", "V"): # transports are returned as (u, v, localu, localv) dsu, dtu, dsv, dtv = open_atlas_transport(model_file, use_mmap=use_mmap) ds2 = CompactDataset(dsv).combine_local(dtv, chunks=chunks) # merge datasets ds = xr.merge([ds1, ds2], combine_attrs=combine_attrs, compat="override") # add attributes ds.attrs["group"] = group.upper() if group in ("u", "v") else group # return xarray dataset return ds
# PURPOSE: read TMD3 netCDF4 files
[docs] def open_tmd3_dataset( input_file: str | pathlib.Path, group: str | None = "z", chunks: int | dict | str | None = None, **kwargs, ): """ Open TMD3-formatted netCDF4 files Parameters ---------- input_file: str or pathlib.Path Input TMD3 netCDF4 file group: str, default 'z' Tidal variable to read - ``'z'``: heights - ``'u'``: zonal currents - ``'U'``: zonal depth-averaged transport - ``'v'``: meridional currents - ``'V'``: meridional depth-averaged transport chunks: int, dict, str, or None, default None Variable chunk sizes for dask (see ``xarray.open_dataset``) Returns ------- ds: xarray.Dataset TMD3 tide model data """ # tilde-expand input file input_file = pyTMD.utilities.Path(input_file).resolve() if isinstance(input_file, pathlib.Path) and not input_file.exists(): raise FileNotFoundError(f"File not found: {input_file}") # read the netCDF4-format tide grid file tmp = xr.open_dataset(input_file, mask_and_scale=True, chunks=chunks) # replace constituents array with names constituents = tmp.constituents.attrs["constituent_order"].split() tmp["constituents"] = constituents # coordinate reference system spatial_proj4 = tmp.mapping.attrs.get("spatial_proj4", 4326) # flip y orientation to be monotonically increasing tmp = tmp.reindex(y=tmp.y[::-1]) # convert imaginary component to negative to match convention # get units attributes for model group if group == "z": ds = (tmp["hRe"] + -1j * tmp["hIm"]).to_dataset(dim="constituents") units = tmp["hRe"].attrs.get("units") elif group in ("U", "u"): ds = (tmp["URe"] + -1j * tmp["UIm"]).to_dataset(dim="constituents") units = tmp["URe"].attrs.get("units") elif group in ("V", "v"): ds = (tmp["VRe"] + -1j * tmp["VIm"]).to_dataset(dim="constituents") units = tmp["VRe"].attrs.get("units") # read water column thickness, mask and flexure ds["mask"] = tmp["mask"] # convert bathymetry to float and rename to match ds["bathymetry"] = tmp.wct.astype("f") # convert flexure from percent to scale factor if hasattr(tmp, "flexure"): ds["flexure"] = tmp.flexure.astype("f") / 100.0 # add attributes for con in ds.data_vars: ds[con].attrs["units"] = units ds.attrs["crs"] = pyproj.CRS.from_user_input(spatial_proj4).to_dict() ds.attrs["group"] = group.upper() if group in ("u", "v") else group ds.attrs["lineage"] = pathlib.Path(input_file).name # return xarray dataset return ds
# PURPOSE: read OTIS grid files
[docs] def open_otis_grid( input_file: str | pathlib.Path, chunks: int | dict | str | None = None, use_mmap: bool = False, **kwargs, ): """ Open OTIS model grid files Parameters ---------- input_file: str or pathlib.Path Input OTIS grid file chunks: int, dict, str, or None, default None Coerce output to specified chunks use_mmap: bool, default False Use memory mapping to read data Returns ------- ds: xarray.Dataset OTIS grid data """ # tilde-expand input file input_file = pyTMD.utilities.Path(input_file).resolve() if isinstance(input_file, pathlib.Path) and not input_file.exists(): raise FileNotFoundError(f"File not found: {input_file}") # set initial offset (skip 4 bytes) offset = 4 # read data as big endian # get model dimensions nx, ny = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 # extract x and y limits (could be latitude and longitude) ylim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 xlim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 # read dt from file (dt,) = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(1,), use_mmap=use_mmap, offset=offset, ) offset += 4 # convert longitudinal limits (if x == longitude) if (xlim[0] < 0) & (xlim[1] < 0) & (dt > 0): xlim += 360.0 # grid bounding box (xmin, xmax, ymin, ymax) bounds = np.array([*xlim, *ylim], dtype=">f4") # x and y coordinate spacing dx = (xlim[1] - xlim[0]) / nx dy = (ylim[1] - ylim[0]) / ny # create x and y arrays arrays x = np.linspace(xlim[0] + dx / 2.0, xlim[1] - dx / 2.0, nx) y = np.linspace(ylim[0] + dy / 2.0, ylim[1] - dy / 2.0, ny) # read nob from file (nob,) = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(1,), use_mmap=use_mmap, offset=offset, ) offset += 4 # skip 8 bytes offset += 8 # read iob from file if nob == 0: iob = [] offset += 4 else: iob = read_raw_binary( input_file, dtype=">i4", shape=(nob, 2), use_mmap=use_mmap, offset=offset, order="C", ) offset += 2 * 4 * nob # skip 8 bytes offset += 8 # read hz matrix hz = read_raw_binary( input_file, dtype=">f4", shape=(ny, nx), use_mmap=use_mmap, offset=offset, order="C", ) offset += 4 * nx * ny # skip 8 bytes offset += 8 # read mz matrix (1: wet point, 0: dry point) mz = read_raw_binary( input_file, dtype=">i4", shape=(ny, nx), use_mmap=use_mmap, offset=offset, order="C", ) offset += 4 * nx * ny # update mask for cases where bathymetry is zero or negative mz = np.minimum(mz, (hz > 0)) # data dictionary grid = dict(dims=("y", "x"), coords={}, data_vars={}) grid["coords"]["y"] = dict(data=y.copy(), dims="y") grid["coords"]["x"] = dict(data=x.copy(), dims="x") for field in ["bathymetry", "mask"]: grid["data_vars"][field] = dict(dims=("y", "x")) # store the data grid["data_vars"]["bathymetry"]["data"] = hz grid["data_vars"]["mask"]["data"] = mz # convert to xarray Dataset from the data dictionary ds = xr.Dataset.from_dict(grid) # coerce to specified chunks if chunks is not None: ds = ds.chunk(chunks) # add attributes ds.attrs["dt"] = dt.copy() ds.attrs["iob"] = iob.copy() ds.attrs["bounds"] = bounds.copy() ds.attrs["lineage"] = pathlib.Path(input_file).name for field in ["mask", "bathymetry"]: ds[field].attrs.update(_attributes["z"][field]) # return xarray dataset return ds
# PURPOSE: read OTIS elevation files
[docs] def open_otis_elevation( input_file: str | pathlib.Path, chunks: int | dict | str | None = None, use_mmap: bool = False, **kwargs, ): """ Read OTIS tidal elevation files Parameters ---------- input_file: str or pathlib.Path Input OTIS elevation file chunks: int, dict, str, or None, default None Coerce output to specified chunks use_mmap: bool, default False Use memory mapping to read data Returns ------- ds: xarray.Dataset OTIS tidal elevation data """ # tilde-expand input file input_file = pyTMD.utilities.Path(input_file).resolve() if isinstance(input_file, pathlib.Path) and not input_file.exists(): raise FileNotFoundError(f"File not found: {input_file}") # set initial offset offset = 0 # read data as big endian ll, nx, ny, nc = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(4,), use_mmap=use_mmap, offset=offset, ) offset += 4 * 4 # offset for x and y limits ylim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 xlim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 # grid bounding box (xmin, xmax, ymin, ymax) bounds = np.array([*xlim, *ylim], dtype=">f4") # x and y coordinate spacing dx = (xlim[1] - xlim[0]) / nx dy = (ylim[1] - ylim[0]) / ny # create x and y arrays arrays x = np.linspace(xlim[0] + dx / 2.0, xlim[1] - dx / 2.0, nx) y = np.linspace(ylim[0] + dy / 2.0, ylim[1] - dy / 2.0, ny) # read constituent name constituents = read_raw_binary( input_file, dtype="|S4", shape=(nc,), use_mmap=use_mmap, offset=offset, order="C", ) # add to offset offset += int(nc) * 4 # data dictionary h = dict(dims=("y", "x"), coords={}, data_vars={}) h["coords"]["y"] = dict(data=y.copy(), dims="y") h["coords"]["x"] = dict(data=x.copy(), dims="x") # read constituents from file for ic in range(nc): # get constituent name field = constituents[ic].decode("utf8").rstrip() h["data_vars"][field] = dict(dims=("y", "x")) # skip records to constituent offset += 8 # read elevations for constituent temp = read_raw_binary( input_file, dtype=">f4", shape=(ny, nx, 2), use_mmap=use_mmap, offset=offset, order="C", ) # real and imaginary components of elevation Z = np.ma.array(temp[:, :, 0] + 1j * temp[:, :, 1]) # update mask for nan values Z.mask = np.isnan(Z.data) | (np.abs(Z.data) == 0) # replace masked values with fill value Z.data[Z.mask] = Z.fill_value # store the data h["data_vars"][field]["data"] = Z # skip to next constituent offset += 4 * 2 * nx * ny # convert to xarray Dataset from the data dictionary ds = xr.Dataset.from_dict(h) # coerce to specified chunks if chunks is not None: ds = ds.chunk(chunks) # add attributes ds.attrs["bounds"] = bounds.copy() ds.attrs["lineage"] = pathlib.Path(input_file).name for field in ds.data_vars: ds[field].attrs.update(_attributes["z"]["elevation"]) # return xarray dataset return ds
# PURPOSE: read OTIS transport files
[docs] def open_otis_transport( input_file: str | pathlib.Path, chunks: int | dict | str | None = None, use_mmap: bool = False, **kwargs, ): """ Read OTIS tidal transport files Parameters ---------- input_file: str or pathlib.Path Input OTIS transport file chunks: int, dict, str, or None, default None Coerce output to specified chunks use_mmap: bool, default False Use memory mapping to read data Returns ------- dsu: xarray.Dataset OTIS zonal tidal transport data dsv: xarray.Dataset OTIS meridional tidal transport data """ # tilde-expand input file input_file = pyTMD.utilities.Path(input_file).resolve() if isinstance(input_file, pathlib.Path) and not input_file.exists(): raise FileNotFoundError(f"File not found: {input_file}") # set initial offset offset = 0 # read data as big endian ll, nx, ny, nc = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(4,), use_mmap=use_mmap, offset=offset, ) offset += 4 * 4 # offset for x and y limits ylim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 xlim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 # grid bounding box (xmin, xmax, ymin, ymax) bounds = np.array([*xlim, *ylim], dtype=">f4") # x and y coordinate spacing dx = (xlim[1] - xlim[0]) / nx dy = (ylim[1] - ylim[0]) / ny # create x and y arrays arrays x = np.linspace(xlim[0] + dx / 2.0, xlim[1] - dx / 2.0, nx) y = np.linspace(ylim[0] + dy / 2.0, ylim[1] - dy / 2.0, ny) # read constituents from file constituents = read_raw_binary( input_file, dtype="|S4", shape=(nc,), use_mmap=use_mmap, offset=offset, order="C", ) # add to offset offset += int(nc) * 4 # u and v dictionaries u = dict(dims=("y", "x"), coords={}, data_vars={}) v = dict(dims=("y", "x"), coords={}, data_vars={}) u["coords"]["y"] = dict(data=y.copy(), dims="y") u["coords"]["x"] = dict(data=x - dx / 2.0, dims="x") v["coords"]["y"] = dict(data=y - dy / 2.0, dims="y") v["coords"]["x"] = dict(data=x.copy(), dims="x") # read constituents from file for ic in range(nc): # get constituent name field = constituents[ic].decode("utf8").rstrip() u["data_vars"][field] = dict(dims=("y", "x")) v["data_vars"][field] = dict(dims=("y", "x")) # skip records to constituent offset += 8 # read elevations for constituent temp = read_raw_binary( input_file, dtype=">f4", shape=(ny, nx, 4), use_mmap=use_mmap, offset=offset, order="C", ) # real and imaginary components of transport U = np.ma.array(temp[:, :, 0] + 1j * temp[:, :, 1]) V = np.ma.array(temp[:, :, 2] + 1j * temp[:, :, 3]) # update mask for nan values U.mask = np.isnan(U.data) | (np.abs(U.data) == 0) V.mask = np.isnan(V.data) | (np.abs(V.data) == 0) # replace masked values with fill value U.data[U.mask] = U.fill_value V.data[V.mask] = V.fill_value # store the data u["data_vars"][field]["data"] = U v["data_vars"][field]["data"] = V # skip to next constituent offset += 4 * 4 * nx * ny # convert to xarray Datasets from the data dictionaries dsu = xr.Dataset.from_dict(u) dsv = xr.Dataset.from_dict(v) # coerce to specified chunks if chunks is not None: dsu = dsu.chunk(chunks) dsv = dsv.chunk(chunks) # add attributes dsu.attrs["bounds"] = bounds.copy() dsv.attrs["bounds"] = bounds.copy() dsu.attrs["lineage"] = pathlib.Path(input_file).name dsv.attrs["lineage"] = pathlib.Path(input_file).name for field in dsu.data_vars: dsu[field].attrs.update(_attributes["u"]["transport"]) for field in dsv.data_vars: dsv[field].attrs.update(_attributes["v"]["transport"]) # return xarray datasets return dsu, dsv
# PURPOSE: read ATLAS-compressed grid files
[docs] def open_atlas_grid( input_file: str | pathlib.Path, use_mmap: bool = False, **kwargs, ): """ Open ATLAS-compressed model grid files Parameters ---------- input_file: str or pathlib.Path Input ATLAS grid file use_mmap: bool, default False Use memory mapping to read data Returns ------- ds: xarray.Dataset ATLAS global grid data dtree: xarray.DataTree Local ATLAS grid solutions """ # tilde-expand input file input_file = pyTMD.utilities.Path(input_file).resolve() if isinstance(input_file, pathlib.Path) and not input_file.exists(): raise FileNotFoundError(f"File not found: {input_file}") # get file information file_info = input_file.stat() # set initial offset (skip 4 bytes) offset = 4 # read data as big endian # get model dimensions nx, ny = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 # extract x and y limits (could be latitude and longitude) ylim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 xlim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 # read dt from file (dt,) = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(1,), use_mmap=use_mmap, offset=offset, ) offset += 4 # grid bounding box (xmin, xmax, ymin, ymax) bounds = np.array([*xlim, *ylim], dtype=">f4") # x and y coordinate spacing dx = (xlim[1] - xlim[0]) / nx dy = (ylim[1] - ylim[0]) / ny # create x and y arrays x = np.linspace(xlim[0] + dx / 2.0, xlim[1] - dx / 2.0, nx) y = np.linspace(ylim[0] + dy / 2.0, ylim[1] - dy / 2.0, ny) # read nob from file (nob,) = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(1,), use_mmap=use_mmap, offset=offset, ) offset += 4 # skip 8 bytes offset += 8 # read iob from file if nob == 0: iob = [] offset += 4 else: iob = read_raw_binary( input_file, dtype=">i4", shape=(nob, 2), use_mmap=use_mmap, offset=offset, order="C", ) offset += 2 * 4 * nob # skip 8 bytes offset += 8 # read hz matrix hz = read_raw_binary( input_file, dtype=">f4", shape=(ny, nx), use_mmap=use_mmap, offset=offset, order="C", ) offset += 4 * nx * ny # skip 8 bytes offset += 8 # read mz matrix (1: wet point, 0: dry point) mz = read_raw_binary( input_file, dtype=">i4", shape=(ny, nx), use_mmap=use_mmap, offset=offset, order="C", ) offset += 4 * nx * ny # update mask for cases where bathymetry is zero or negative mz = np.minimum(mz, (hz > 0)) # skip 8 bytes offset += 8 # read pmask matrix pmask = read_raw_binary( input_file, dtype=">i4", shape=(ny, nx), use_mmap=use_mmap, offset=offset, order="C", ) offset += 4 * nx * ny # skip 8 bytes offset += 8 # data dictionary grid = dict(dims=("y", "x"), coords={}, data_vars={}) grid["coords"]["y"] = dict(data=y.copy(), dims="y") grid["coords"]["x"] = dict(data=x.copy(), dims="x") for field in ["bathymetry", "mask", "local"]: grid["data_vars"][field] = dict(dims=("y", "x")) # store the data grid["data_vars"]["bathymetry"]["data"] = hz grid["data_vars"]["mask"]["data"] = mz grid["data_vars"]["local"]["data"] = pmask # convert to xarray Dataset from the data dictionary ds = xr.Dataset.from_dict(grid) # add attributes ds.attrs["dt"] = dt.copy() ds.attrs["iob"] = iob.copy() ds.attrs["bounds"] = bounds.copy() ds.attrs["lineage"] = pathlib.Path(input_file).name for field in ["mask", "bathymetry"]: ds[field].attrs.update(_attributes["z"][field]) # read local models nmod = 0 dtree = xr.DataTree() # while the file position is not at the end of file while offset < file_info.st_size: # add 1 to number of models nmod += 1 # get local model dimensions # and number of valid depth indices NX, NY, ND = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(3,), use_mmap=use_mmap, offset=offset, ) offset += 3 * 4 # extract local x and y limits YLIM = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ).astype(">f8") offset += 2 * 4 XLIM = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ).astype(">f8") offset += 2 * 4 # local bounding box (xmin, xmax, ymin, ymax) BOUNDS = np.array([*XLIM, *YLIM], dtype=">f4") # x and y coordinate spacing DX = (XLIM[1] - XLIM[0]) / NX DY = (YLIM[1] - YLIM[0]) / NY # create local x and y arrays X = np.linspace(XLIM[0] + DX / 2.0, XLIM[1] - DX / 2.0, NX) Y = np.linspace(YLIM[0] + DY / 2.0, YLIM[1] - DY / 2.0, NY) # extract region name temp = read_raw_binary( input_file, dtype="|S4", shape=(5,), use_mmap=use_mmap, offset=offset, order="C", ) name = b"".join(temp).decode("utf8").strip() offset += 5 * 4 # skip 8 bytes offset += 8 # extract local valid indices indx = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(ND,), use_mmap=use_mmap, offset=offset, ) offset += 4 * ND indy = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(ND,), use_mmap=use_mmap, offset=offset, ) offset += 4 * ND # reduce coordinates to valid points gridx, gridy = np.meshgrid(X, Y) XD = gridx[indy - 1, indx - 1] YD = gridy[indy - 1, indx - 1] # skip 8 bytes offset += 8 # extract depth for valid points depth = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(ND,), use_mmap=use_mmap, offset=offset, order="C", ) offset += 4 * ND # skip 8 bytes offset += 8 # save to dictionary local = dict(dims=("i",), coords={}, data_vars={}) local["data_vars"]["y"] = dict(data=YD.copy(), dims="i") local["data_vars"]["x"] = dict(data=XD.copy(), dims="i") for field in [ "bathymetry", ]: local["data_vars"][field] = dict(dims=("i",)) # store the data local["data_vars"]["bathymetry"]["data"] = depth # convert to xarray Dataset from the data dictionary dtree[name] = xr.Dataset.from_dict(local) dtree[name].attrs["bounds"] = BOUNDS.copy() dtree[name].attrs["lineage"] = pathlib.Path(input_file).name # return xarray dataset (global) and datatree (local) return (ds, dtree)
# PURPOSE: read ATLAS-compressed elevation files
[docs] def open_atlas_elevation( input_file: str | pathlib.Path, use_mmap: bool = False, **kwargs, ): """ Open ATLAS-compressed tidal elevation files Parameters ---------- input_file: str or pathlib.Path Input ATLAS elevation file use_mmap: bool, default False Use memory mapping to read data Returns ------- ds: xarray.Dataset ATLAS global tidal elevation data dtree: xarray.DataTree Local ATLAS tidal elevation solutions """ # tilde-expand input file input_file = pyTMD.utilities.Path(input_file).resolve() if isinstance(input_file, pathlib.Path) and not input_file.exists(): raise FileNotFoundError(f"File not found: {input_file}") # get file information file_info = input_file.stat() # set initial offset offset = 0 # read data as big endian ll, nx, ny, nc = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(4,), use_mmap=use_mmap, offset=offset, ) offset += 4 * 4 # offset for x and y limits ylim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 xlim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 # grid bounding box (xmin, xmax, ymin, ymax) bounds = np.array([*xlim, *ylim], dtype=">f4") # x and y coordinate spacing dx = (xlim[1] - xlim[0]) / nx dy = (ylim[1] - ylim[0]) / ny # create x and y arrays x = np.linspace(xlim[0] + dx / 2.0, xlim[1] - dx / 2.0, nx) y = np.linspace(ylim[0] + dy / 2.0, ylim[1] - dy / 2.0, ny) # read constituents from file constituents = read_raw_binary( input_file, dtype="|S4", shape=(nc,), use_mmap=use_mmap, offset=offset, order="C", ) # add to offset offset += int(nc) * 4 # data dictionary h = dict(dims=("y", "x"), coords={}, data_vars={}) h["coords"]["y"] = dict(data=y.copy(), dims="y") h["coords"]["x"] = dict(data=x.copy(), dims="x") # read constituents from file for ic in range(nc): # get constituent name field = constituents[ic].decode("utf8").rstrip() h["data_vars"][field] = dict(dims=("y", "x")) # skip records to constituent offset += 8 # read elevations for constituent temp = read_raw_binary( input_file, dtype=">f4", shape=(ny, nx, 2), use_mmap=use_mmap, offset=offset, order="C", ) # real and imaginary components of elevation Z = np.ma.array(temp[:, :, 0] + 1j * temp[:, :, 1]) # update mask for nan values Z.mask = np.isnan(Z.data) | (np.abs(Z.data) == 0) # replace masked values with fill value Z.data[Z.mask] = Z.fill_value # store the data h["data_vars"][field]["data"] = Z # skip to next constituent offset += 4 * 2 * nx * ny # convert to xarray Dataset from the data dictionary ds = xr.Dataset.from_dict(h) # add attributes ds.attrs["bounds"] = bounds.copy() ds.attrs["lineage"] = pathlib.Path(input_file).name for field in ds.data_vars: ds[field].attrs.update(_attributes["z"]["elevation"]) offset += 4 # read local models nmod = 0 dtree = xr.DataTree() # while the file position is not at the end of file while offset < file_info.st_size: # add 1 to number of models offset += 4 nmod += 1 # get local model dimensions, number of constituents # and number of valid elevation indices NX, NY, NC, NZ = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(4,), use_mmap=use_mmap, offset=offset, ) offset += 4 * 4 # extract local x and y limits YLIM = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ).astype(">f8") offset += 2 * 4 XLIM = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ).astype(">f8") offset += 2 * 4 # local bounding box (xmin, xmax, ymin, ymax) BOUNDS = np.array([*XLIM, *YLIM], dtype=">f4") # x and y coordinate spacing DX = (XLIM[1] - XLIM[0]) / NX DY = (YLIM[1] - YLIM[0]) / NY # create local x and y arrays X = np.linspace(XLIM[0] + DX / 2.0, XLIM[1] - DX / 2.0, NX) Y = np.linspace(YLIM[0] + DY / 2.0, YLIM[1] - DY / 2.0, NY) # extract constituents for localized solution CONSTITUENTS = read_raw_binary( input_file, dtype="|S4", shape=(NC,), use_mmap=use_mmap, offset=offset, order="C", ) offset += NC * 4 # extract region name temp = read_raw_binary( input_file, dtype="|S4", shape=(5,), use_mmap=use_mmap, offset=offset, order="C", ) name = b"".join(temp).decode("utf8").strip() offset += 5 * 4 # skip 8 bytes offset += 8 # extract local valid indices indx = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(NZ,), use_mmap=use_mmap, offset=offset, ) offset += 4 * NZ indy = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(NZ,), use_mmap=use_mmap, offset=offset, ) offset += 4 * NZ # reduce coordinates to valid points gridx, gridy = np.meshgrid(X, Y) XZ = gridx[indy - 1, indx - 1] YZ = gridy[indy - 1, indx - 1] # skip 4 bytes offset += 4 # save to dictionary local = dict(dims=("i",), coords={}, data_vars={}) local["data_vars"]["y"] = dict(data=YZ.copy(), dims="i") local["data_vars"]["x"] = dict(data=XZ.copy(), dims="i") # read constituents from file for IC in range(NC): # get constituent name field = CONSTITUENTS[IC].decode("utf8").rstrip() local["data_vars"][field] = dict(dims=("i",)) # skip 4 bytes offset += 4 # read elevation for constituent temp = read_raw_binary( input_file, dtype=">f4", shape=(NZ, 2), use_mmap=use_mmap, offset=offset, order="C", ) offset += 4 * 2 * NZ # skip 4 bytes offset += 4 # create local elevation Z = np.zeros((NZ), dtype=np.complex64) # real and imaginary components of elevation Z.real[:] = temp[:, 0] Z.imag[:] = temp[:, 1] # store the data variable local["data_vars"][field]["data"] = Z # convert to xarray Dataset from the data dictionary dtree[name] = xr.Dataset.from_dict(local) dtree[name].attrs["bounds"] = BOUNDS.copy() dtree[name].attrs["lineage"] = pathlib.Path(input_file).name # return xarray dataset (global) and datatree (local) return (ds, dtree)
# PURPOSE: read ATLAS-compressed transport files
[docs] def open_atlas_transport( input_file: str | pathlib.Path, use_mmap: bool = False, **kwargs, ): """ Open ATLAS-compressed tidal transport files Parameters ---------- input_file: str or pathlib.Path Input ATLAS transport file use_mmap: bool, default False Use memory mapping to read data Returns ------- dsu: xarray.Dataset ATLAS global zonal tidal transport data dsv: xarray.Dataset ATLAS global meridional tidal transport data dtu: xarray.DataTree Local ATLAS zonal tidal transport solutions dtv: xarray.DataTree Local ATLAS meridional tidal transport solutions """ # tilde-expand input file input_file = pyTMD.utilities.Path(input_file).resolve() if isinstance(input_file, pathlib.Path) and not input_file.exists(): raise FileNotFoundError(f"File not found: {input_file}") # get file information file_info = input_file.stat() # set initial offset offset = 0 # read data as big endian ll, nx, ny, nc = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(4,), use_mmap=use_mmap, offset=offset, ) offset += 4 * 4 # offset for x and y limits ylim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 xlim = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ) offset += 2 * 4 # grid bounding box (xmin, xmax, ymin, ymax) bounds = np.array([*xlim, *ylim], dtype=">f4") # x and y coordinate spacing dx = (xlim[1] - xlim[0]) / nx dy = (ylim[1] - ylim[0]) / ny # create x and y arrays x = np.linspace(xlim[0] + dx / 2.0, xlim[1] - dx / 2.0, nx) y = np.linspace(ylim[0] + dy / 2.0, ylim[1] - dy / 2.0, ny) # read constituents from file constituents = read_raw_binary( input_file, dtype="|S4", shape=(nc,), use_mmap=use_mmap, offset=offset, order="C", ) # add to offset offset += int(nc) * 4 # u and v dictionaries u = dict(dims=("y", "x"), coords={}, data_vars={}) v = dict(dims=("y", "x"), coords={}, data_vars={}) u["coords"]["y"] = dict(data=y.copy(), dims="y") u["coords"]["x"] = dict(data=x - dx / 2.0, dims="x") v["coords"]["y"] = dict(data=y - dy / 2.0, dims="y") v["coords"]["x"] = dict(data=x.copy(), dims="x") # read constituents from file for ic in range(nc): # get constituent name field = constituents[ic].decode("utf8").rstrip() u["data_vars"][field] = dict(dims=("y", "x")) v["data_vars"][field] = dict(dims=("y", "x")) # skip records to constituent offset += 8 # read elevations for constituent temp = read_raw_binary( input_file, dtype=">f4", shape=(ny, nx, 4), use_mmap=use_mmap, offset=offset, order="C", ) # real and imaginary components of transport U = np.ma.array(temp[:, :, 0] + 1j * temp[:, :, 1]) V = np.ma.array(temp[:, :, 2] + 1j * temp[:, :, 3]) # update mask for nan values U.mask = np.isnan(U.data) | (np.abs(U.data) == 0) V.mask = np.isnan(V.data) | (np.abs(V.data) == 0) # replace masked values with fill value U.data[U.mask] = U.fill_value V.data[V.mask] = V.fill_value # store the data u["data_vars"][field]["data"] = U v["data_vars"][field]["data"] = V # skip to next constituent offset += 4 * 4 * nx * ny # convert to xarray Datasets from the data dictionaries dsu = xr.Dataset.from_dict(u) dsv = xr.Dataset.from_dict(v) # add attributes dsu.attrs["bounds"] = bounds.copy() dsv.attrs["bounds"] = bounds.copy() dsu.attrs["lineage"] = pathlib.Path(input_file).name dsv.attrs["lineage"] = pathlib.Path(input_file).name for field in dsu.data_vars: dsu[field].attrs.update(_attributes["u"]["transport"]) for field in dsv.data_vars: dsv[field].attrs.update(_attributes["v"]["transport"]) offset += 4 # read local models nmod = 0 dtu = xr.DataTree() dtv = xr.DataTree() # while the file position is not at the end of file while offset < file_info.st_size: # add 1 to number of models offset += 4 nmod += 1 # get local model dimensions, number of constituents # and number of valid elevation indices NX, NY, NC, NU, NV = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(5,), use_mmap=use_mmap, offset=offset, ) offset += 5 * 4 # extract local x and y limits YLIM = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ).astype(">f8") offset += 2 * 4 XLIM = read_raw_binary( input_file, dtype=np.dtype(">f4"), shape=(2,), use_mmap=use_mmap, offset=offset, ).astype(">f8") offset += 2 * 4 # local bounding box (xmin, xmax, ymin, ymax) BOUNDS = np.array([*XLIM, *YLIM], dtype=">f4") # x and y coordinate spacing DX = (XLIM[1] - XLIM[0]) / NX DY = (YLIM[1] - YLIM[0]) / NY # x and y coordinate spacing DX = (XLIM[1] - XLIM[0]) / NX DY = (YLIM[1] - YLIM[0]) / NY # create local x and y arrays X = np.linspace(XLIM[0] + DX / 2.0, XLIM[1] - DX / 2.0, NX) Y = np.linspace(YLIM[0] + DY / 2.0, YLIM[1] - DY / 2.0, NY) # extract constituents for localized solution CONSTITUENTS = read_raw_binary( input_file, dtype="|S4", shape=(NC,), use_mmap=use_mmap, offset=offset, order="C", ) offset += NC * 4 # extract region name temp = read_raw_binary( input_file, dtype="|S4", shape=(5,), use_mmap=use_mmap, offset=offset, order="C", ) name = b"".join(temp).decode("utf8").strip() offset += 5 * 4 # skip 8 bytes offset += 8 # extract local valid indices for zonal transports iux = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(NU,), use_mmap=use_mmap, offset=offset, ) offset += 4 * NU iuy = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(NU,), use_mmap=use_mmap, offset=offset, ) offset += 4 * NU # skip 8 bytes offset += 8 # extract local valid indices for meridional transports ivx = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(NV,), use_mmap=use_mmap, offset=offset, ) offset += 4 * NV ivy = read_raw_binary( input_file, dtype=np.dtype(">i4"), shape=(NV,), use_mmap=use_mmap, offset=offset, ) offset += 4 * NV # skip 4 bytes offset += 4 # reduce coordinates to valid points gridx, gridy = np.meshgrid(X, Y) XU = gridx[iuy - 1, iux - 1] - DX / 2.0 YU = gridy[iuy - 1, iux - 1] XV = gridx[ivy - 1, ivx - 1] YV = gridy[ivy - 1, ivx - 1] - DY / 2.0 # u and v dictionaries lclu = dict(dims=("i",), coords={}, data_vars={}) lclv = dict(dims=("i",), coords={}, data_vars={}) lclu["data_vars"]["y"] = dict(data=YU.copy(), dims="i") lclu["data_vars"]["x"] = dict(data=XU.copy(), dims="i") lclv["data_vars"]["y"] = dict(data=YV.copy(), dims="i") lclv["data_vars"]["x"] = dict(data=XV.copy(), dims="i") # read constituents from file for IC in range(NC): # get constituent name field = CONSTITUENTS[IC].decode("utf8").rstrip() # skip 4 bytes offset += 4 # read zonal transport for constituent temp = read_raw_binary( input_file, dtype=">f4", shape=(NU, 2), use_mmap=use_mmap, offset=offset, order="C", ) offset += 4 * 2 * NU # create local zonal transport U = np.ma.zeros((NY, NX), dtype=np.complex64) U.mask = np.ones((NY, NX), dtype=bool) # real and imaginary components of u transport U.data.real[iuy - 1, iux - 1] = temp[:, 0] U.data.imag[iuy - 1, iux - 1] = temp[:, 1] U.mask[iuy - 1, iux - 1] = False # skip 8 bytes offset += 8 # read meridional transport for constituent temp = read_raw_binary( input_file, dtype=">f4", shape=(NV, 2), use_mmap=use_mmap, offset=offset, order="C", ) offset += 4 * 2 * NV # skip 4 bytes offset += 4 # create local meridional transport V = np.ma.zeros((NY, NX), dtype=np.complex64) V.mask = np.ones((NY, NX), dtype=bool) # real and imaginary components of v transport V.data.real[ivy - 1, ivx - 1] = temp[:, 0] V.data.imag[ivy - 1, ivx - 1] = temp[:, 1] V.mask[ivy - 1, ivx - 1] = False # store the data variables lclu["data_vars"][field] = {} lclu["data_vars"][field]["dims"] = ("y", "x") lclu["data_vars"][field]["data"] = U lclv["data_vars"][field] = {} lclv["data_vars"][field]["dims"] = ("y", "x") lclv["data_vars"][field]["data"] = V # convert to xarray Datasets from the data dictionaries dtu[name] = xr.Dataset.from_dict(lclu) dtv[name] = xr.Dataset.from_dict(lclv) dtu[name].attrs["bounds"] = BOUNDS.copy() dtv[name].attrs["bounds"] = BOUNDS.copy() dtu[name].attrs["lineage"] = pathlib.Path(input_file).name dtv[name].attrs["lineage"] = pathlib.Path(input_file).name # return xarray datasets (global) and datatrees (local) return (dsu, dsv, dtu, dtv)
# PURPOSE: read a variable from a raw binary file # with the option to use memory-mapping
[docs] def read_raw_binary( path: str | pathlib.Path, dtype: np.dtype | str, shape: tuple, use_mmap: bool = False, offset: int = 0, order: str = "C", **kwargs, ): """ Read a variable from a raw binary file Parameters ---------- path: str or pathlib.Path Path to input file dtype: numpy.dtype or str Variable data type shape: tuple Shape of the data use_mmap: bool, default False Create a memory-map of the variable offset: int, default 0 Offset to apply on read order: str, default 'C' Memory layout of array Returns ------- var: numpy.ndarray Data variable """ # open the file and read the variable with open(path, mode="rb") as fid: if use_mmap: # use memory-mapping var = np.memmap( fid, dtype=np.dtype(dtype), mode="r", offset=offset, shape=shape, order=order, ) else: # read variable directly count = np.prod(shape) var = np.fromfile( fid, dtype=np.dtype(dtype), offset=offset, count=count ) var = var.reshape(shape, order=order) # verify data shape var.shape = shape return var
# PURPOSE: write a variable to a raw binary file with memory-mapping
[docs] def write_raw_binary( path: str | pathlib.Path, variable: np.ndarray, offset: int = 0, order: str = "C", **kwargs, ): """ Write a variable to a raw binary file with memory-mapping Parameters ---------- path: str or pathlib.Path Path to input file variable: numpy.ndarray Data variable to write offset: int, default 0 Offset to apply on read order: str, default 'C' Memory layout of array kwargs: dict Additional keyword arguments for ``np.memmap`` """ # convert variable to array variable = np.array(variable) # set default keyword arguments kwargs.setdefault("shape", variable.shape) kwargs.setdefault("dtype", variable.dtype) # reshape variable variable = variable.reshape(kwargs["shape"], order=order) # use memory-mapping to write variable var = np.memmap( path, dtype=np.dtype(kwargs["dtype"]), mode="r+", offset=offset, shape=kwargs["shape"], order=order, ) var[:] = variable.astype(kwargs["dtype"]) var.flush()
# PURPOSE: OTIS utilities for xarray Datasets
[docs] @register_dataset_subaccessor("otis") class OTISDataset: """``xarray.Dataset`` utilities for OTIS tidal models""" def __init__(self, ds): # initialize dataset self._ds = ds # PURPOSE: interpolate grid variables to u and v nodes
[docs] def merge(self, ds, group: str = "z"): """ Merge grid and model datasets while interpolating grid variables from zeta nodes to u and v nodes Parameters ---------- ds: xarray.Dataset OTIS tide model data group: str, default 'z' Tidal variable of input dataset - ``'z'``: heights - ``'u'``: zonal currents - ``'U'``: zonal depth-averaged transport - ``'v'``: meridional currents - ``'V'``: meridional depth-averaged transport """ # wrap mask if global mode = "wrap" if self.is_global else "edge" context = f"zeta_to_{group}" # interpolate to u and v nodes if group in ("u", "U"): # calculate Dataset on u grids # pad and roll the mask and bathymetry tmp = self._ds.pad(x=(1, 0), mode=mode).rolling(x=2) mask = tmp.min()["mask"].isel(x=slice(1, None)) bathymetry = tmp.mean()["bathymetry"].isel(x=slice(1, None)) # assign to dataset ds["mask"] = (ds.dims, mask.values) ds["bathymetry"] = ds["mask"] * bathymetry.values for field in ["mask", "bathymetry"]: ds[field].attrs.update(_attributes["u"][field]) # update attributes ds.attrs = combine_attrs([self._ds.attrs, ds.attrs], context) elif group in ("v", "V"): # calculate Dataset on v grids # pad and roll the mask and bathymetry tmp = self._ds.pad(y=(1, 0), mode="edge").rolling(y=2) mask = tmp.min()["mask"].isel(y=slice(1, None)) bathymetry = tmp.mean()["bathymetry"].isel(y=slice(1, None)) # assign to dataset ds["mask"] = (ds.dims, mask.values) ds["bathymetry"] = ds["mask"] * bathymetry.values for field in ["mask", "bathymetry"]: ds[field].attrs.update(_attributes["v"][field]) # update attributes ds.attrs = combine_attrs([self._ds.attrs, ds.attrs], context) else: # merge without interpolation ds = xr.merge( [self._ds, ds], combine_attrs=combine_attrs, compat="override" ) # set coordinate reference system ds.attrs["crs"] = self.crs.to_dict() # return the updated datasets return ds
@property def crs(self): """Coordinate reference system of the ``Dataset``""" # return the CRS of the dataset # default is EPSG:4326 (WGS84) CRS = self._ds.attrs.get("crs", 4326) return pyproj.CRS.from_user_input(CRS) @property def is_global(self) -> bool: """Determine if the dataset covers a global domain""" # grid spacing in x-direction self._dx = self._ds.x[1] - self._ds.x[0] # check if global grid cyclic = np.isclose(self._ds.x[-1] - self._ds.x[0], 360.0 - self._dx) return self.crs.is_geographic and cyclic
# PURPOSE: OTIS utilities for xarray datatrees
[docs] @register_datatree_subaccessor("otis") class OTISDataTree: """``xarray.DataTree`` utilities for OTIS tidal models""" def __init__(self, dtree): # initialize datatree self._dtree = dtree # PURPOSE: output grid file in OTIS format
[docs] def to_grid(self, path: str | pathlib.Path): """ Writes OTIS-format grid files Parameters ---------- path: str or pathlib.Path Output OTIS grid file name """ # tilde-expand output file path = pyTMD.utilities.Path(path).resolve() path.touch() # offset in output file offset = 0 # get c-grid data from elevation dataset ds = self._dtree["z"].to_dataset() # get dimensions nob = len(ds.attrs["iob"]) ny = len(ds["y"]) nx = len(ds["x"]) # record length record_length = 32 # write header to file header = np.array([record_length, nx, ny], dtype=">i4") write_raw_binary(path, header, shape=(3,), dtype=">i4", offset=offset) offset += 4 * 3 # extract bounds and write to file write_raw_binary( path, ds.attrs["bounds"], shape=(4,), dtype=">f4", offset=offset ) offset += 4 * 4 # extract time step and write to file write_raw_binary( path, ds.attrs["dt"], shape=(1,), dtype=">f4", offset=offset ) offset += 4 # write number of open boundaries to file write_raw_binary(path, nob, shape=(1,), dtype=">i4", offset=offset) offset += 4 offset += 8 if nob == 0: offset += 4 else: write_raw_binary( path, ds.attrs["iob"], shape=(nob, 2), dtype=">i4", offset=offset, ) offset += 4 * 2 * nob # write depth to file offset += 8 write_raw_binary( path, ds["bathymetry"].to_numpy(), dtype=">f4", offset=offset ) offset += 4 * nx * ny offset += 4 # write mask to file offset += 4 write_raw_binary( path, ds["mask"].to_numpy(), dtype=">i4", offset=offset ) offset += 4 * nx * ny # end variable write_raw_binary( path, record_length, shape=(1,), dtype=">i4", offset=offset ) offset += 4
# PURPOSE: output elevation file in OTIS format
[docs] def to_elevation(self, path: str | pathlib.Path, **kwargs): """ Writes OTIS-format elevation files Parameters ---------- path: str or pathlib.Path Output OTIS elevation file name """ # tilde-expand output file path = pyTMD.utilities.Path(path).resolve() path.touch() # offset in output file offset = 0 # get z data ds = kwargs.get("z", self._dtree["z"].to_dataset()) # get dimensions ny = len(ds["y"]) nx = len(ds["x"]) nc = len(ds.tmd.constituents) # length of header: allow for 4 character >i c_id strings header_length = 4 * (7 + nc) # write header to file header = np.array([header_length, nx, ny, nc], dtype=">i4") write_raw_binary(path, header, shape=(4,), dtype=">i4", offset=offset) offset += 4 * 4 # extract bounds and write to file write_raw_binary( path, ds.attrs["bounds"], shape=(4,), dtype=">f4", offset=offset ) offset += 4 * 4 # write constituent names to file write_raw_binary( path, ds.tmd.constituents, shape=(nc,), dtype="|S4", offset=offset ) offset += 4 * nc offset += 4 # write each constituent to file for c in ds.tmd.constituents: offset += 4 # merge real and imaginary components of elevation temp = np.zeros((ny, nx, 2), dtype=">f4") temp[:, :, 0] = ds[c].real.values temp[:, :, 1] = ds[c].imag.values write_raw_binary(path, temp, dtype=">f4", offset=offset) offset += 4 * 2 * nx * ny offset += 4 # end variable write_raw_binary( path, header_length, shape=(1,), dtype=">i4", offset=offset ) offset += 4
# PURPOSE: output transport file in OTIS format
[docs] def to_transport(self, path: str | pathlib.Path, **kwargs): """ Writes OTIS-format transport files Parameters ---------- path: str or pathlib.Path Output OTIS elevation file name """ # tilde-expand output file path = pyTMD.utilities.Path(path).resolve() path.touch() # offset in output file offset = 0 # get u and v data dsu = kwargs.get("u", self._dtree["u"].to_dataset()) dsv = kwargs.get("v", self._dtree["v"].to_dataset()) # get dimensions ny = len(dsu["y"]) nx = len(dsu["x"]) nc = len(dsu.tmd.constituents) # length of header: allow for 4 character >i c_id strings header_length = 4 * (7 + nc) # write header to file header = np.array([header_length, nx, ny, nc], dtype=">i4") write_raw_binary(path, header, shape=(4,), dtype=">i4", offset=offset) offset += 4 * 4 # extract bounds and write to file write_raw_binary( path, dsu.attrs["bounds"], shape=(4,), dtype=">f4", offset=offset ) offset += 4 * 4 # write constituent names to file write_raw_binary( path, dsu.tmd.constituents, shape=(nc,), dtype="|S4", offset=offset ) offset += 4 * nc offset += 4 # write each constituent to file for c in dsu.tmd.constituents: offset += 4 # merge real and imaginary components of u and v transports temp = np.zeros((ny, nx, 4), dtype=">f4") temp[:, :, 0] = dsu[c].real.values temp[:, :, 1] = dsu[c].imag.values temp[:, :, 2] = dsv[c].real.values temp[:, :, 3] = dsv[c].imag.values write_raw_binary(path, temp, dtype=">f4", offset=offset) offset += 4 * 4 * nx * ny offset += 4 # end variable write_raw_binary( path, header_length, shape=(1,), dtype=">i4", offset=offset ) offset += 4
# PURPOSE: output elevation file in OTIS format
[docs] def to_mfelevation(self, directory: str | pathlib.Path): """ Writes OTIS-format singular elevation files Parameters ---------- directory: str or pathlib.Path Output directory for OTIS elevation files """ # tilde-expand output directory directory = pyTMD.utilities.Path(directory).resolve() # get z data ds = self._dtree["z"].to_dataset() # write each constituent to file for c in ds.tmd.constituents: path = directory.joinpath(c) self.to_elevation(path, z=ds[[c]])
# PURPOSE: output elevation file in OTIS format
[docs] def to_mftransport(self, directory: str | pathlib.Path): """ Writes OTIS-format singular transport files Parameters ---------- directory: str or pathlib.Path Output directory for OTIS transport files """ # tilde-expand output directory directory = pyTMD.utilities.Path(directory).resolve() # get u and v data dsu = self._dtree["u"].to_dataset() dsv = self._dtree["v"].to_dataset() # write each constituent to file for c in dsu.tmd.constituents: path = directory.joinpath(c) self.to_transport(path, u=dsu[[c]], v=dsv[[c]])
# PURPOSE: ATLAS-compact utilities for xarray Datasets
[docs] @register_dataset_subaccessor("compact") class CompactDataset: """ ``xarray.Dataset`` utilities for ATLAS-compact tidal models """ def __init__(self, ds, spacing: float | list[float] = 1.0 / 30.0): # initialize dataset self._ds = ds self._dx, self._dy = np.broadcast_to(np.atleast_1d(spacing), (2,))
[docs] def refine(self): """Refine data resolution to a finer resolution""" # create coordinate DataArrays x = xr.DataArray(self._x, dims="x") y = xr.DataArray(self._y, dims="y") # interpolate global model to refined grid ds = self._ds.interp(x=x, y=y) ds.attrs["bounds"] = np.array(self.__bounds__) return ds
[docs] def combine_local( self, dtree: xr.DataTree, chunks: int | dict | str | None = None, ): """Combine ATLAS model solutions into a single xarray Dataset Parameters ---------- dtree: xarray.DataTree Local ATLAS model solutions chunks: int, dict, str, or None, default None Coerce output to specified chunks Returns ------- ds: xarray.Dataset ATLAS tide model data """ # create refined dataset ds = self.refine() # for each local model for region, local in dtree.items(): # check if any model longitudes are -180:180 X = local.x.where(local.x >= 0, local.x + 360.0, drop=False) # local indices in global model indx = ((X - ds.x.min()) // self._dx).astype("i") indy = ((local.y - ds.y.min()) // self._dy).astype("i") # for each data variable in the global model for key in ds.data_vars.keys(): # check if data variable is in the local model if key in local.data_vars: # replace global data with local data at valid indices ds[key][indy, indx] = local[key][:] elif key == "mask": # replace global mask with local mask at valid indices ds[key][indy, indx] = True # coerce to specified chunks if chunks is not None: ds = ds.chunk(chunks) # return combined xarray Dataset return ds
@property def shape(self): """Grid dimensions""" nx = np.round(360.0 / self._dx).astype(int) ny = np.round(180.0 / self._dy).astype(int) return np.array([ny, nx]) @property def size(self): """Grid size""" return np.prod(self.shape) @property def _x(self): """Refined x-coordinates""" return np.linspace(self.__xlim__[0], self.__xlim__[1], self.shape[1]) @property def _y(self): """Refined y-coordinates""" return np.linspace(self.__ylim__[0], self.__ylim__[1], self.shape[0]) @property def __xlim__(self): """Limits for x-coordinates""" return [self._dx / 2.0, 360.0 - self._dx / 2.0] @property def __ylim__(self): """Limits for y-coordinates""" return [-90.0 + self._dy / 2.0, 90.0 - self._dy / 2.0] @property def __bounds__(self): """Bounding box for refined grid""" return np.array([*self.__xlim__, *self.__ylim__])