Source code for pyTMD.io.NOAA

#!/usr/bin/env python
"""
NOAA.py
Written by Tyler Sutterley (04/2026)
Query and parsing functions for NOAA webservices API

PYTHON DEPENDENCIES:
    lxml: processing XML and HTML in Python
        https://pypi.python.org/pypi/lxml
    pandas: Python Data Analysis Library
        https://pandas.pydata.org

UPDATE HISTORY:
    Updated 04/2026: added builder for XSLT 1.0 stylesheets
        allows retrieval of prediction stations coordinates
    Updated 01/2026: raise original exception in case of HTTPError
    Updated 12/2025: make dataframe accessor inherit from Dataset
    Updated 11/2025: add accessor for pandas dataframe objects
        added function to reduce prediction stations to active
    Updated 08/2025: replace invalid water level values with NaN
        convert all station names to title case (some are upper)
    Written 07/2025: extracted from Compare NOAA Tides notebook
"""

from __future__ import annotations

import logging
import traceback
import numpy as np
import pyTMD.constituents
import pyTMD.utilities
from pyTMD.io.dataset import Dataset

# attempt imports
pd = pyTMD.utilities.import_dependency("pandas")
pandas_available = pyTMD.utilities.dependency_available("pandas")

__all__ = [
    "build_query",
    "build_stylesheet",
    "from_xml",
    "active_stations",
    "prediction_stations",
    "harmonic_constituents",
    "water_level",
    "DataFrame",
]

_apis = [
    "activestations",
    "currentpredictionstations",
    "tidepredictionstations",
    "harmonicconstituents",
    "waterlevelrawonemin",
    "waterlevelrawsixmin",
    "waterlevelverifiedsixmin",
    "waterlevelverifiedhourly",
    "waterlevelverifieddaily",
    "waterlevelverifiedmonthly",
]

_xpaths = {
    "activestations": "//wsdl:station",
    "currentpredictionstations": "//wsdl:station",
    "tidepredictionstations": "//wsdl:station",
    "harmonicconstituents": "//wsdl:item",
    "waterlevelrawonemin": "//wsdl:item",
    "waterlevelrawsixmin": "//wsdl:item",
    "waterlevelverifiedsixmin": "//wsdl:item",
    "waterlevelverifiedhourly": "//wsdl:item",
    "waterlevelverifieddaily": "//wsdl:item",
    "waterlevelverifiedmonthly": "//wsdl:item",
}


[docs] def build_query(api: str, **kwargs): """ Build a query for the NOAA webservices API Parameters ---------- api: str NOAA webservices API endpoint to query kwargs: dict Additional query parameters to include in the request Returns ------- url: str Complete URL for API request namespaces: dict Namespaces for parsing ``XML`` responses """ # NOAA webservices hosts HOST = "https://tidesandcurrents.noaa.gov/axis/webservices" OPENDAP = "https://opendap.co-ops.nos.noaa.gov/axis/webservices" # NOAA webservices query arguments arguments = "?format=xml" for key, value in kwargs.items(): arguments += f"&{key}={value}" arguments += "&Submit=Submit" # NOAA API query url url = f"{HOST}/{api}/response.jsp{arguments}" # lxml namespaces for parsing namespaces = {} namespaces["wsdl"] = f"{OPENDAP}/{api}/wsdl" return (url, namespaces)
[docs] def build_stylesheet( namespaces: dict, key: str = "wsdl", **kwargs, ): """ Build an XSLT stylesheet to flatten NOAA webservices API responses Parameters ---------- namespaces: dict Namespaces for parsing ``XML`` responses key: str, default "wsdl" Key for namespace to use in stylesheet Returns ------- stylesheet: StringIO file-like object for XSLT stylesheet """ from io import StringIO # XSLT namespace and stylesheet template xsl = "http://www.w3.org/1999/XSL/Transform" # lxml only supports XSLT 1.0: cannot use xpath-default-namespace namespace = namespaces[key] # build output stylesheet stylesheet = f""" <xsl:stylesheet version="1.0" xmlns:xsl="{xsl}" xmlns:{key}="{namespace}"> <xsl:output method="xml" omit-xml-declaration="yes" indent="yes"/> <xsl:template match="{key}:metadata"> <xsl:copy-of select="{key}:location/*"/> <xsl:copy-of select="{key}:date_established"/> </xsl:template> <xsl:template match="@*|node()"> <xsl:copy> <xsl:apply-templates select="@*|node()"/> </xsl:copy> </xsl:template> </xsl:stylesheet> """ return StringIO(stylesheet)
[docs] def from_xml(url: str, **kwargs): """ Query the NOAA webservices API and return as a ``DataFrame`` Parameters ---------- url: str The complete URL for the API request kwargs: dict Additional keyword arguments to pass to ``pandas.read_xml`` Returns ------- df: pandas.DataFrame Data from NOAA webservices """ # query the NOAA webservices API assert pandas_available, "pandas is required for accessing NOAA webservices" try: logging.debug(url) df = pd.read_xml(url, **kwargs) except ValueError as exc: logging.error(traceback.format_exc()) except pyTMD.utilities.urllib2.HTTPError as exc: logging.error(traceback.format_exc()) exc.msg = "Error querying NOAA webservices API" raise else: # return the dataframe return df
[docs] def active_stations( api: str = "activestations", **kwargs, ): """ Retrieve a list of active tide stations Parameters ---------- api: str NOAA webservices API endpoint to query kwargs: dict Additional query parameters to include in the request Returns ------- df: pandas.DataFrame Table of currently active stations """ # get list of active tide stations xpath = _xpaths[api] url, namespaces = build_query(api, **kwargs) stylesheet = build_stylesheet(namespaces) df = from_xml( url, xpath=xpath, namespaces=namespaces, stylesheet=stylesheet, ) # rename columns for consistency df = df.rename(columns={"name": "ID", "ID": "name"}) # convert station names to title case df["name"] = df["name"].str.title() # convert station IDs to strings df["ID"] = df["ID"].astype(str) # set the index to the station name df = df.set_index("name") # sort the index and drop parameter column df = df.sort_index().drop(columns=["parameter"], errors="ignore") # return the dataframe return df
[docs] def prediction_stations( api: str = "tidepredictionstations", active_only: bool = True, **kwargs, ): """ Retrieve a list of tide prediction stations Parameters ---------- api: str NOAA webservices API endpoint to query active_only: bool, default True Reduce list to active stations only kwargs: dict Additional query parameters to include in the request Returns ------- df: pandas.DataFrame Table of tide prediction stations """ # get list of tide prediction stations xpath = _xpaths[api] url, namespaces = build_query(api, **kwargs) stylesheet = build_stylesheet(namespaces) df = from_xml( url, xpath=xpath, namespaces=namespaces, stylesheet=stylesheet, ) # convert station names to title case df["name"] = df["name"].str.title() # convert station IDs to strings df["ID"] = df["ID"].astype(str) # set the index to the station name df = df.set_index("name") # sort the index and drop metadata column df = df.sort_index().drop(columns=["metadata"], errors="ignore") # reduce list to active stations only if active_only: df = df[df.ID.isin(active_stations().ID)] # return the dataframe return df
[docs] def harmonic_constituents( api: str = "harmonicconstituents", **kwargs, ): """ Retrieve a list of harmonic constituents for a specified station Parameters ---------- api: str NOAA webservices API endpoint to query kwargs: dict Additional query parameters to include in the request Returns ------- df: pandas.DataFrame Table of tide constituents """ # set default query parameters kwargs.setdefault("unit", 0) kwargs.setdefault("timeZone", 0) # get list of harmonic constituents xpath = _xpaths[api] url, namespaces = build_query(api, **kwargs) df = from_xml(url, xpath=xpath, namespaces=namespaces) # set the index to the constituent number df = df.set_index("constNum") # parse harmonic constituents df["constituent"] = df["name"].apply(pyTMD.constituents._parse_name) # return the dataframe return df
[docs] def water_level( api: str = "waterlevelrawsixmin", **kwargs, ): """ Retrieve water level data for a specified station and date range Parameters ---------- api: str NOAA webservices API endpoint to query kwargs: dict Additional query parameters to include in the request Returns ------- df: pandas.DataFrame Table of water level data """ # set default query parameters kwargs.setdefault("unit", 0) kwargs.setdefault("timeZone", 0) kwargs.setdefault("datum", "MSL") # get water levels for station and date range xpath = _xpaths[api] url, namespaces = build_query(api, **kwargs) df = from_xml( url, xpath=xpath, namespaces=namespaces, parse_dates=["timeStamp"] ) # replace invalid water level values with NaN df = df.replace(to_replace=[-999], value=np.nan) # return the dataframe return df
[docs] @pd.api.extensions.register_dataframe_accessor("tmd") class DataFrame(Dataset): """Accessor for extending an ``pandas.DataFrame`` for tide models""" def __init__(self, df): # store the pandas dataframe self._df = df # convert to xarray Dataset ds = self.to_dataset() # initialize the parent class super().__init__(ds)
[docs] def to_dataset(self): """Convert NOAA constituent ``Dataframe`` to an ``xarray.Dataset`` Returns ------- ds: xarray.Dataset Tide constituent ``Dataset`` """ # complex constituent oscillation(s) hc = self._df.amplitude * np.exp(-1j * np.radians(self._df.phase)) # convert data series to xarray DataArray darr = hc.to_xarray().rename({"constNum": "constituent"}) # assign constituent names as coordinates darr = darr.assign_coords({"constituent": self._df.constituent.values}) # convert DataArray to Dataset with constituents as variables ds = darr.to_dataset(dim="constituent") return ds