#!/usr/bin/env python
"""
NOAA.py
Written by Tyler Sutterley (04/2026)
Query and parsing functions for NOAA webservices API
PYTHON DEPENDENCIES:
lxml: processing XML and HTML in Python
https://pypi.python.org/pypi/lxml
pandas: Python Data Analysis Library
https://pandas.pydata.org
UPDATE HISTORY:
Updated 04/2026: added builder for XSLT 1.0 stylesheets
allows retrieval of prediction stations coordinates
Updated 01/2026: raise original exception in case of HTTPError
Updated 12/2025: make dataframe accessor inherit from Dataset
Updated 11/2025: add accessor for pandas dataframe objects
added function to reduce prediction stations to active
Updated 08/2025: replace invalid water level values with NaN
convert all station names to title case (some are upper)
Written 07/2025: extracted from Compare NOAA Tides notebook
"""
from __future__ import annotations
import logging
import traceback
import numpy as np
import pyTMD.constituents
import pyTMD.utilities
from pyTMD.io.dataset import Dataset
# attempt imports
pd = pyTMD.utilities.import_dependency("pandas")
pandas_available = pyTMD.utilities.dependency_available("pandas")
__all__ = [
"build_query",
"build_stylesheet",
"from_xml",
"active_stations",
"prediction_stations",
"harmonic_constituents",
"water_level",
"DataFrame",
]
_apis = [
"activestations",
"currentpredictionstations",
"tidepredictionstations",
"harmonicconstituents",
"waterlevelrawonemin",
"waterlevelrawsixmin",
"waterlevelverifiedsixmin",
"waterlevelverifiedhourly",
"waterlevelverifieddaily",
"waterlevelverifiedmonthly",
]
_xpaths = {
"activestations": "//wsdl:station",
"currentpredictionstations": "//wsdl:station",
"tidepredictionstations": "//wsdl:station",
"harmonicconstituents": "//wsdl:item",
"waterlevelrawonemin": "//wsdl:item",
"waterlevelrawsixmin": "//wsdl:item",
"waterlevelverifiedsixmin": "//wsdl:item",
"waterlevelverifiedhourly": "//wsdl:item",
"waterlevelverifieddaily": "//wsdl:item",
"waterlevelverifiedmonthly": "//wsdl:item",
}
[docs]
def build_query(api: str, **kwargs):
"""
Build a query for the NOAA webservices API
Parameters
----------
api: str
NOAA webservices API endpoint to query
kwargs: dict
Additional query parameters to include in the request
Returns
-------
url: str
Complete URL for API request
namespaces: dict
Namespaces for parsing ``XML`` responses
"""
# NOAA webservices hosts
HOST = "https://tidesandcurrents.noaa.gov/axis/webservices"
OPENDAP = "https://opendap.co-ops.nos.noaa.gov/axis/webservices"
# NOAA webservices query arguments
arguments = "?format=xml"
for key, value in kwargs.items():
arguments += f"&{key}={value}"
arguments += "&Submit=Submit"
# NOAA API query url
url = f"{HOST}/{api}/response.jsp{arguments}"
# lxml namespaces for parsing
namespaces = {}
namespaces["wsdl"] = f"{OPENDAP}/{api}/wsdl"
return (url, namespaces)
[docs]
def build_stylesheet(
namespaces: dict,
key: str = "wsdl",
**kwargs,
):
"""
Build an XSLT stylesheet to flatten NOAA webservices API responses
Parameters
----------
namespaces: dict
Namespaces for parsing ``XML`` responses
key: str, default "wsdl"
Key for namespace to use in stylesheet
Returns
-------
stylesheet: StringIO
file-like object for XSLT stylesheet
"""
from io import StringIO
# XSLT namespace and stylesheet template
xsl = "http://www.w3.org/1999/XSL/Transform"
# lxml only supports XSLT 1.0: cannot use xpath-default-namespace
namespace = namespaces[key]
# build output stylesheet
stylesheet = f"""
<xsl:stylesheet version="1.0" xmlns:xsl="{xsl}" xmlns:{key}="{namespace}">
<xsl:output method="xml" omit-xml-declaration="yes" indent="yes"/>
<xsl:template match="{key}:metadata">
<xsl:copy-of select="{key}:location/*"/>
<xsl:copy-of select="{key}:date_established"/>
</xsl:template>
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
"""
return StringIO(stylesheet)
[docs]
def from_xml(url: str, **kwargs):
"""
Query the NOAA webservices API and return as a ``DataFrame``
Parameters
----------
url: str
The complete URL for the API request
kwargs: dict
Additional keyword arguments to pass to ``pandas.read_xml``
Returns
-------
df: pandas.DataFrame
Data from NOAA webservices
"""
# query the NOAA webservices API
assert pandas_available, "pandas is required for accessing NOAA webservices"
try:
logging.debug(url)
df = pd.read_xml(url, **kwargs)
except ValueError as exc:
logging.error(traceback.format_exc())
except pyTMD.utilities.urllib2.HTTPError as exc:
logging.error(traceback.format_exc())
exc.msg = "Error querying NOAA webservices API"
raise
else:
# return the dataframe
return df
[docs]
def active_stations(
api: str = "activestations",
**kwargs,
):
"""
Retrieve a list of active tide stations
Parameters
----------
api: str
NOAA webservices API endpoint to query
kwargs: dict
Additional query parameters to include in the request
Returns
-------
df: pandas.DataFrame
Table of currently active stations
"""
# get list of active tide stations
xpath = _xpaths[api]
url, namespaces = build_query(api, **kwargs)
stylesheet = build_stylesheet(namespaces)
df = from_xml(
url,
xpath=xpath,
namespaces=namespaces,
stylesheet=stylesheet,
)
# rename columns for consistency
df = df.rename(columns={"name": "ID", "ID": "name"})
# convert station names to title case
df["name"] = df["name"].str.title()
# convert station IDs to strings
df["ID"] = df["ID"].astype(str)
# set the index to the station name
df = df.set_index("name")
# sort the index and drop parameter column
df = df.sort_index().drop(columns=["parameter"], errors="ignore")
# return the dataframe
return df
[docs]
def prediction_stations(
api: str = "tidepredictionstations",
active_only: bool = True,
**kwargs,
):
"""
Retrieve a list of tide prediction stations
Parameters
----------
api: str
NOAA webservices API endpoint to query
active_only: bool, default True
Reduce list to active stations only
kwargs: dict
Additional query parameters to include in the request
Returns
-------
df: pandas.DataFrame
Table of tide prediction stations
"""
# get list of tide prediction stations
xpath = _xpaths[api]
url, namespaces = build_query(api, **kwargs)
stylesheet = build_stylesheet(namespaces)
df = from_xml(
url,
xpath=xpath,
namespaces=namespaces,
stylesheet=stylesheet,
)
# convert station names to title case
df["name"] = df["name"].str.title()
# convert station IDs to strings
df["ID"] = df["ID"].astype(str)
# set the index to the station name
df = df.set_index("name")
# sort the index and drop metadata column
df = df.sort_index().drop(columns=["metadata"], errors="ignore")
# reduce list to active stations only
if active_only:
df = df[df.ID.isin(active_stations().ID)]
# return the dataframe
return df
[docs]
def harmonic_constituents(
api: str = "harmonicconstituents",
**kwargs,
):
"""
Retrieve a list of harmonic constituents for a specified station
Parameters
----------
api: str
NOAA webservices API endpoint to query
kwargs: dict
Additional query parameters to include in the request
Returns
-------
df: pandas.DataFrame
Table of tide constituents
"""
# set default query parameters
kwargs.setdefault("unit", 0)
kwargs.setdefault("timeZone", 0)
# get list of harmonic constituents
xpath = _xpaths[api]
url, namespaces = build_query(api, **kwargs)
df = from_xml(url, xpath=xpath, namespaces=namespaces)
# set the index to the constituent number
df = df.set_index("constNum")
# parse harmonic constituents
df["constituent"] = df["name"].apply(pyTMD.constituents._parse_name)
# return the dataframe
return df
[docs]
def water_level(
api: str = "waterlevelrawsixmin",
**kwargs,
):
"""
Retrieve water level data for a specified station and date range
Parameters
----------
api: str
NOAA webservices API endpoint to query
kwargs: dict
Additional query parameters to include in the request
Returns
-------
df: pandas.DataFrame
Table of water level data
"""
# set default query parameters
kwargs.setdefault("unit", 0)
kwargs.setdefault("timeZone", 0)
kwargs.setdefault("datum", "MSL")
# get water levels for station and date range
xpath = _xpaths[api]
url, namespaces = build_query(api, **kwargs)
df = from_xml(
url, xpath=xpath, namespaces=namespaces, parse_dates=["timeStamp"]
)
# replace invalid water level values with NaN
df = df.replace(to_replace=[-999], value=np.nan)
# return the dataframe
return df
[docs]
@pd.api.extensions.register_dataframe_accessor("tmd")
class DataFrame(Dataset):
"""Accessor for extending an ``pandas.DataFrame`` for tide models"""
def __init__(self, df):
# store the pandas dataframe
self._df = df
# convert to xarray Dataset
ds = self.to_dataset()
# initialize the parent class
super().__init__(ds)
[docs]
def to_dataset(self):
"""Convert NOAA constituent ``Dataframe`` to an ``xarray.Dataset``
Returns
-------
ds: xarray.Dataset
Tide constituent ``Dataset``
"""
# complex constituent oscillation(s)
hc = self._df.amplitude * np.exp(-1j * np.radians(self._df.phase))
# convert data series to xarray DataArray
darr = hc.to_xarray().rename({"constNum": "constituent"})
# assign constituent names as coordinates
darr = darr.assign_coords({"constituent": self._df.constituent.values})
# convert DataArray to Dataset with constituents as variables
ds = darr.to_dataset(dim="constituent")
return ds