#!/usr/bin/env python
"""
GOT.py
Written by Tyler Sutterley (04/2026)
Reads ascii and netCDF4 files from Richard Ray's Goddard Ocean Tide (GOT) model
https://earth.gsfc.nasa.gov/geo/data/ocean-tide-models
PYTHON DEPENDENCIES:
numpy: Scientific Computing Tools For Python
https://numpy.org
https://numpy.org/doc/stable/user/numpy-for-matlab-users.html
xarray: N-D labeled arrays and datasets in Python
https://docs.xarray.dev/en/stable/
UPDATE HISTORY:
Updated 04/2026: added lineage attributes to save model filename(s)
Updated 03/2026: use numpy functions to convert from degrees to radians
Updated 02/2026: make dataset accessor for GOT be a subaccessor from dataset
some models have units in the second line of the header text
Updated 12/2025: no longer subclassing pathlib.Path for working directories
added function to write to output GOT-formatted ascii files
fixed writing of output constituents to match GOT attribute format
Updated 11/2025: near-complete rewrite of program to use xarray
Updated 10/2025: simplify ascii read function to use masked_equal
Updated 08/2025: use numpy degree to radian conversions
added option to gap fill when reading constituent grids
Updated 11/2024: expose buffer distance for cropping tide model data
Updated 10/2024: fix error when using default bounds in extract_constants
Updated 07/2024: added crop and bounds keywords for trimming model data
use parse function from constituents class to extract names
Updated 04/2023: fix repeated longitudinal convention adjustment
using pathlib to define and expand tide model paths
Updated 03/2023: add basic variable typing to function inputs
Updated 12/2022: refactor tide read programs under io
new functions to read and interpolate from constituents class
new functions to read and write GOT netCDF4 files
refactored interpolation routines into new module
Updated 11/2022: use f-strings for formatting verbose or ascii output
Updated 05/2022: reformat arguments to extract_GOT_constants definition
changed keyword arguments to camel case
Updated 04/2022: updated docstrings to numpy documentation format
include utf-8 encoding in reads to be windows compliant
Updated 12/2021: adjust longitude convention based on model longitude
Updated 07/2021: added check that tide model files are accessible
Updated 06/2021: add warning for tide models being entered as string
Updated 05/2021: added option for extrapolation cutoff in kilometers
Updated 03/2021: add extrapolation check where there are no invalid points
prevent ComplexWarning for fill values when calculating amplitudes
simplified inputs to be similar to binary OTIS read program
replaced numpy bool/int to prevent deprecation warnings
Updated 02/2021: set invalid values to nan in extrapolation
replaced numpy bool to prevent deprecation warning
Updated 12/2020: added valid data extrapolation with nearest_extrap
Updated 09/2020: set bounds error to false for regular grid interpolations
adjust dimensions of input coordinates to be iterable
Updated 08/2020: replaced griddata with scipy regular grid interpolators
Updated 07/2020: added function docstrings. separate bilinear interpolation
update griddata interpolation. add option for compression
Updated 06/2020: use argmin and argmax in bilinear interpolation
Updated 11/2019: find invalid mask points for each constituent
Updated 09/2019: output as numpy masked arrays instead of nan-filled arrays
Updated 07/2019: interpolate fill value mask with bivariate splines
Updated 12/2018: python3 compatibility updates for division and zip
Updated 10/2018: added scale as load tides are in mm and ocean are in cm
Updated 08/2018: added multivariate spline interpolation option
Written 07/2018
"""
from __future__ import division, annotations
import re
import gzip
import pathlib
import datetime
import numpy as np
import xarray as xr
import pyTMD.version
import pyTMD.constituents
import pyTMD.utilities
from .dataset import combine_attrs, register_dataset_subaccessor
# attempt imports
dask = pyTMD.utilities.import_dependency("dask")
dask_available = pyTMD.utilities.dependency_available("dask")
__all__ = [
"open_mfdataset",
"open_got_dataset",
"open_got_ascii",
"open_got_netcdf",
"GOTDataset",
]
# PURPOSE: read a list of GOT ASCII or netCDF4 files
[docs]
def open_mfdataset(
model_files: list[str] | list[pathlib.Path],
parallel: bool = False,
**kwargs,
):
"""
Open multiple GOT model files
Parameters
----------
model_files: list of str or pathlib.Path
List of OTIS model files
parallel: bool, default False
Open files in parallel using ``dask.delayed``
kwargs: dict
Additional keyword arguments for opening GOT files
Returns
-------
ds: xarray.Dataset
GOT tide model data
"""
# merge multiple granules
if parallel and dask_available:
opener = dask.delayed(open_got_dataset)
else:
opener = open_got_dataset
# read each file as xarray dataset and append to list
d = [opener(f, **kwargs) for f in model_files]
# read datasets as dask arrays
if parallel and dask_available:
(d,) = dask.compute(d)
# merge datasets
ds = xr.merge(d, combine_attrs=combine_attrs, compat="override")
# return xarray dataset
return ds
# PURPOSE: reads a GOT ASCII or netCDF4 file
[docs]
def open_got_dataset(
input_file: str | pathlib.Path,
**kwargs,
):
"""
Open GOT-formatted model files
Parameters
----------
input_file: str or pathlib.Path
Input transport file
format: str, default 'netcdf'
Model format
- ``'ascii'``: traditional GOT ASCII format
- ``'netcdf'``: GOT netCDF4 format
kwargs: dict
Additional keyword arguments for opening GOT files
Returns
-------
ds: xarray.Dataset
GOT tide model data
"""
# detect file format if not provided
if kwargs.get("format", None) is None:
kwargs["format"] = pyTMD.utilities.detect_format(input_file)
# detect if file is compressed if not provided
if kwargs.get("compressed", None) is None:
kwargs["compressed"] = pyTMD.utilities.detect_compression(input_file)
# read constituent from file
if kwargs["format"] == "ascii":
# GOT ascii constituent files
ds = open_got_ascii(input_file, **kwargs)
elif kwargs["format"] == "netcdf":
# GOT netCDF4 constituent files
ds = open_got_netcdf(input_file, **kwargs)
else:
raise ValueError(f"Unrecognized file format: {kwargs['format']}")
# return dataset
return ds
# PURPOSE: read GOT ASCII files
[docs]
def open_got_ascii(
input_file: str | pathlib.Path,
chunks: int | dict | str | None = None,
**kwargs,
):
"""
Open GOT-formatted ASCII files
Parameters
----------
input_file: str or pathlib.Path
Model file
chunks: int, dict, str, or None, default None
Coerce output to specified chunks
compressed: bool, default False
Input file is ``gzip`` compressed
Returns
-------
ds: xarray.Dataset
GOT tide model data
"""
# set default keyword arguments
kwargs.setdefault("compressed", False)
# tilde-expand input file
input_file = pyTMD.utilities.Path(input_file).resolve()
if isinstance(input_file, pathlib.Path) and not input_file.exists():
raise FileNotFoundError(f"File not found: {input_file}")
# read the ASCII-format file
if kwargs["compressed"]:
# read gzipped ascii file
with gzip.open(input_file, "rb") as f:
file_contents = f.read().decode("utf8").splitlines()
else:
with open(input_file, mode="r", encoding="utf8") as f:
file_contents = f.read().splitlines()
# parse header text
# constituent identifier
cons = pyTMD.constituents._parse_name(file_contents[0])
# get units from header if available
rx = re.compile(r"\((\w+m)\)", re.IGNORECASE)
# GOT headers from Richard Ray have units on the first line
# some other models have units on the second line
if rx.search(file_contents[0]):
units = rx.findall(file_contents[0], re.IGNORECASE)
elif rx.search(file_contents[1]):
units = rx.findall(file_contents[1], re.IGNORECASE)
else:
units = None
# grid dimensions
nlat, nlon = np.array(file_contents[2].split(), dtype=int)
# longitude range
ilat = np.array(file_contents[3].split(), dtype=np.float64)
# latitude range
ilon = np.array(file_contents[4].split(), dtype=np.float64)
# mask fill value
masked_values = file_contents[5].split()
fill_value = np.array(masked_values[0], dtype=np.float32)
# number of columns in ascii file
ncol = 11
# create latitude and longitude arrays
lat = np.linspace(ilat[0], ilat[1], nlat)
lon = np.linspace(ilon[0], ilon[1], nlon)
# data dictionary
var = dict(dims=("y", "x"), coords={}, data_vars={})
var["coords"]["y"] = dict(data=lat.copy(), dims="y")
var["coords"]["x"] = dict(data=lon.copy(), dims="x")
# input amplitude and phase
amp = np.zeros((nlat, nlon), dtype=np.float32)
ph = np.zeros((nlat, nlon), dtype=np.float32)
# starting lines to fill amplitude and phase variables
l1 = 7
l2 = 14 + int(nlon // ncol) * nlat + nlat
# for each latitude
for i in range(nlat):
for j in range(nlon // ncol):
j1 = j * ncol
# amplitude and phase are on two separate rows
amp[i, j1 : j1 + ncol] = np.array(file_contents[l1].split())
ph[i, j1 : j1 + ncol] = np.array(file_contents[l2].split())
l1 += 1
l2 += 1
# add last row of tidal variables
j1 = (j + 1) * ncol
j2 = nlon % ncol
# amplitude and phase are on two separate rows
amp[i, j1 : j1 + j2] = np.array(file_contents[l1].split())
ph[i, j1 : j1 + j2] = np.array(file_contents[l2].split())
l1 += 1
l2 += 1
# convert to masked arrays
amp = np.ma.masked_equal(amp, fill_value)
ph = np.ma.masked_equal(ph, fill_value)
# store the data variables
var["data_vars"][cons] = {}
var["data_vars"][cons]["dims"] = ("y", "x")
var["data_vars"][cons]["data"] = amp * np.exp(-1j * np.radians(ph))
# convert to xarray Dataset from the data dictionary
ds = xr.Dataset.from_dict(var)
# coerce to specified chunks
if chunks is not None:
ds = ds.chunk(chunks)
# add attributes
ds.attrs["group"] = "z"
ds.attrs["lineage"] = pathlib.Path(input_file).name
if units:
ds[cons].attrs["units"] = units[0].lower()
# return xarray dataset
return ds
# PURPOSE: read GOT netCDF4 files
[docs]
def open_got_netcdf(
input_file: str | pathlib.Path,
chunks: int | dict | str | None = None,
**kwargs,
):
"""
Open GOT-formatted netCDF4 files
Parameters
----------
input_file: str or pathlib.Path
Model file
chunks: int, dict, str, or None, default None
Variable chunk sizes for dask (see ``xarray.open_dataset``)
compressed: bool, default False
Input file is ``gzip`` compressed
Returns
-------
ds: xarray.Dataset
GOT tide model data
"""
# set default keyword arguments
kwargs.setdefault("compressed", False)
# tilde-expand input file
input_file = pyTMD.utilities.Path(input_file).resolve()
if isinstance(input_file, pathlib.Path) and not input_file.exists():
raise FileNotFoundError(f"File not found: {input_file}")
# read the netCDF4-format file
if kwargs["compressed"]:
# read gzipped netCDF4 file
f = gzip.open(input_file, "rb")
tmp = xr.open_dataset(
f, mask_and_scale=True, decode_coords=False, chunks=chunks
)
else:
tmp = xr.open_dataset(
input_file, mask_and_scale=True, decode_coords=False, chunks=chunks
)
# extract constituent from attribute
cons = pyTMD.constituents._parse_name(tmp.attrs["Constituent"])
# create output xarray dataset for file
ds = xr.Dataset()
# assign coordinates
ds.coords["x"] = tmp.longitude
ds.coords["y"] = tmp.latitude
# calculate complex form of constituent oscillation
ds[cons] = tmp.amplitude * np.exp(-1j * np.radians(tmp.phase))
# rename dimensions
mapping_coords = dict(lon="x", lat="y")
ds = ds.rename(mapping_coords)
# add attributes
ds.attrs["group"] = "z"
ds.attrs["lineage"] = pathlib.Path(input_file).name
ds[cons].attrs["units"] = tmp["amplitude"].attrs.get("units")
# return xarray dataset
return ds
# PURPOSE: GOT utilities for xarray Datasets
[docs]
@register_dataset_subaccessor("got")
class GOTDataset:
"""``xarray.Dataset`` utilities for GOT tidal models"""
def __init__(self, ds):
self._ds = ds
[docs]
def to_ascii(
self,
path: str | pathlib.Path,
fill_value: float = 999.0,
mode: str = "w",
**kwargs,
):
"""
Writes tidal constituents to ASCII files in GOT format
Parameters
----------
path: str | pathlib.Path
Output directory for ASCII files
fill_value: float, default -999.0
Fill value for missing data
mode: str, default 'w'
File mode
kwargs: dict
Additional keyword arguments for ASCII writer
"""
# tilde-expand output path
path = pyTMD.utilities.Path(path).resolve()
# for each variable
for v in self._ds.data_vars.keys():
# output dataset
ds = xr.Dataset()
# calculate amplitude and phase with fill values
ds["amplitude"] = self._ds[v].tmd.amplitude.fillna(fill_value)
ds["phase"] = self._ds[v].tmd.phase.fillna(fill_value)
# get min and max of coordinates
nlat, nlon = self._ds[v].shape
ymin, ymax = ds["y"].values.min(), ds["y"].values.max()
xmin, xmax = ds["x"].values.min(), ds["x"].values.max()
# number of columns in ascii file
ncol = 11
# write GOT ASCII file
FILE = path.joinpath(f"{v}.d")
with open(FILE, mode=mode, encoding="utf8") as f:
# write header information for amplitude
units = ds["amplitude"].attrs.get("units", "")
f.write(f"{v.upper()} tide amplitude ({units})\n")
f.write(f"Generated by pyTMD {pyTMD.version.full_version}\n")
f.write(f"{nlat:20d} {nlon:20d}\n")
f.write(f"{ymin:20.4f} {ymax:20.4f}\n")
f.write(f"{xmin:20.4f} {xmax:20.4f}\n")
f.write(f"{fill_value:20.2f} {fill_value:20.2f}\n")
f.write("\n")
# write amplitude data
for i in range(nlat):
amp = ds["amplitude"].isel(y=i).values
fmt = [f"{v:7.2f}" for v in amp]
for j in range(nlon // ncol + 1):
j1 = j * ncol
f.write("".join(fmt[j1 : j1 + ncol]) + "\n")
# write header information for phase
units = ds["phase"].attrs.get("units", "")
f.write(f"{v.upper()} tide phase lags ({units})\n")
f.write(f"Generated by pyTMD {pyTMD.version.full_version}\n")
f.write(f"{nlat:20d} {nlon:20d}\n")
f.write(f"{ymin:20.4f} {ymax:20.4f}\n")
f.write(f"{xmin:20.4f} {xmax:20.4f}\n")
f.write(f"{fill_value:20.2f} {fill_value:20.2f}\n")
f.write("\n")
# write phase data
for i in range(nlat):
ph = ds["phase"].isel(y=i).values
fmt = [f"{v:7.2f}" for v in ph]
for j in range(nlon // ncol + 1):
j1 = j * ncol
f.write("".join(fmt[j1 : j1 + ncol]) + "\n")
# PURPOSE: output tidal constituent file in GOT netCDF format
[docs]
def to_netcdf(
self,
path: str | pathlib.Path,
mode: str = "w",
encoding: dict = {"zlib": True, "complevel": 9},
**kwargs,
):
"""
Writes tidal constituents to netCDF4 files in GOT format
Parameters
----------
path: str | pathlib.Path
Output directory for netCDF4 files
mode: str, default 'w'
netCDF4 file mode
encoding: dict, default {"zlib": True, "complevel": 9}
netCDF4 variable compression settings
kwargs: dict
Additional keyword arguments for ``xarray`` netCDF4 writer
"""
# tilde-expand output path
path = pyTMD.utilities.Path(path).resolve()
# set default encoding
kwargs.setdefault("encoding", dict(amplitude=encoding, phase=encoding))
# coordinate remapping
mapping_coords = dict(x="longitude", y="latitude")
attrs = dict(longitude={}, latitude={})
attrs["longitude"] = dict(units="degrees_east", long_name="longitude")
attrs["latitude"] = dict(units="degrees_north", long_name="latitude")
# get longitude and latitude arrays
# for each variable
for v in self._ds.data_vars.keys():
ds = xr.Dataset()
# calculate amplitude and phase
ds["amplitude"] = self._ds[v].tmd.amplitude
ds["phase"] = self._ds[v].tmd.phase
ds["amplitude"].attrs["units"] = self._ds[v].attrs.get("units", "")
ds["phase"].attrs["units"] = "degrees"
ds["amplitude"].attrs["long_name"] = "Tide amplitude"
ds["phase"].attrs["long_name"] = "Greenwich tide phase lag"
# rename dimensions
ds = ds.swap_dims(dict(x="lon", y="lat"))
# remap coordinates to GOT convention
ds = ds.rename(mapping_coords)
# update variable attributes
for att_name, att_val in attrs.items():
ds[att_name].attrs.update(att_val)
# add global attributes
ds.attrs["title"] = "GOT tidal constituent data"
ds.attrs["authors"] = "Richard Ray"
ds.attrs["institution"] = "NASA Goddard Space Flight Center"
# define and fill constituent ID
ds.attrs["Constituent"] = v.upper().encode("utf8")
ds.attrs["date_created"] = datetime.datetime.now().isoformat()
ds.attrs["software_reference"] = pyTMD.version.project_name
ds.attrs["software_version"] = pyTMD.version.full_version
# write GOT netCDF4 file
FILE = path.joinpath(f"{v}.nc")
ds.to_netcdf(FILE, mode=mode, **kwargs)