diff --git a/ndbc_api/ndbc_api.py b/ndbc_api/ndbc_api.py index 8720852..fdacae1 100644 --- a/ndbc_api/ndbc_api.py +++ b/ndbc_api/ndbc_api.py @@ -30,7 +30,7 @@ import warnings from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timedelta -from typing import Any, List, Sequence, Tuple, Union, Dict +from typing import Any, List, Sequence, Tuple, Union, Dict, Optional import xarray import pandas as pd @@ -380,10 +380,13 @@ def station(self, except (ResponseException, ValueError, KeyError) as e: raise ResponseException('Failed to handle returned data.') from e - def available_realtime(self, - station_id: Union[str, int], - as_df: bool = False) -> Union[pd.DataFrame, dict]: - """Get the available realtime measurements for a station. + def available_realtime( + self, + station_id: Union[str, int], + full_response: bool = False, + as_df: Optional[bool] = None, + ) -> Union[List[str], pd.DataFrame, dict]: + """Get the available realtime modalities for a station. While most data buoy (station) measurements are available over multi-year time ranges, some measurements depreciate or become @@ -394,6 +397,10 @@ def available_realtime(self, Args: station_id: The NDBC station ID (e.g. `'tplm2'` or `41001`) for the station of interest. + full_response: Whether to return the full response from the NDBC + API, defaults to `False` and a list of modes from `get_modes()` + is returned. If `True`, the full URL for each data mode is + included in the returned `dict` or `pandas.DataFrame`. as_df: Whether to return station-level data as a `pandas.DataFrame`, defaults to `False`, and a `dict` is returned. @@ -408,9 +415,29 @@ def available_realtime(self, """ station_id = self._parse_station_id(station_id) try: - data = self._stations_api.realtime(handler=self._handler, - station_id=station_id) - return self._handle_data(data, as_df, cols=None) + station_realtime = self._stations_api.realtime( + handler=self._handler, station_id=station_id) + full_data = {} + if full_response: + if as_df is None: + as_df = False + full_data = self._handle_data(station_realtime, + as_df, + cols=None) + return full_data + else: + full_data = self._handle_data(station_realtime, + as_df=False, + cols=None) + + # Parse the modes from the full response + _modes = self.get_modes() + station_modes = set() + for k in full_data: + for m in _modes: + if m in full_data[k]['description']: + station_modes.add(m) + return list(station_modes) except (ResponseException, ValueError, KeyError) as e: raise ResponseException('Failed to handle returned data.') from e @@ -456,7 +483,8 @@ def get_data( cols: List[str] = None, station_ids: Union[Sequence[Union[int, str]], None] = None, modes: Union[List[str], None] = None, - use_opendap: bool = False, + as_xarray_dataset: bool = False, + use_opendap: Optional[bool] = None, ) -> Union[pd.DataFrame, xarray.Dataset, dict]: """Execute data query against the specified NDBC station(s). @@ -487,10 +515,14 @@ def get_data( service column headers as a timestamp, and to use this timestamp as the index. as_df: Whether to return station-level data as a `pandas.DataFrame`, - defaults to `False`, and a `dict` is returned. + defaults to `True`, if `False` a `dict` is returned unless + `as_xarray_dataset` is set to `True`. + as_xarray_dataset: Whether to return tbe data as an `xarray.Dataset`, + defaults to `False`. cols: A list of columns of interest which are selected from the available data columns, such that only the desired columns are returned. All columns are returned if `None` is specified. + use_opendap: An alias for `as_xarray_dataset`. Returns: The available station(s) measurements for the specified modes, time @@ -507,6 +539,11 @@ def get_data( HandlerException: There was an error in handling the returned data as a `dict` or `pandas.DataFrame`. """ + if use_opendap is not None: + as_xarray_dataset = use_opendap + + as_df = as_df and not as_xarray_dataset + self.log(logging.DEBUG, message=f"`get_data` called with arguments: {locals()}") if station_id is None and station_ids is None: @@ -532,7 +569,7 @@ def get_data( handle_modes.extend(modes) for mode in handle_modes: - if mode not in self.get_modes(use_opendap): + if mode not in self.get_modes(use_opendap=as_xarray_dataset): raise RequestException(f"Mode {mode} is not available.") self.log(logging.INFO, @@ -559,7 +596,7 @@ def get_data( use_timestamp=use_timestamp, as_df=as_df, cols=cols, - use_opendap=use_opendap, + use_opendap=as_xarray_dataset, ) for future in as_completed(station_futures.values()): @@ -579,22 +616,28 @@ def get_data( self.log( level=logging.WARN, station_id=station_id, - message=(f"Failed to process request for station_id " - f"{station_id} with error: {e}")) + message=( + f"Failed to process request for station_id " + f"{station_id} with error: {e}")) self.log(logging.INFO, message="Finished processing request.") return self._handle_accumulate_data(accumulated_data) - - def get_modes(self, use_opendap: bool = False) -> List[str]: + def get_modes(self, + use_opendap: bool = False, + as_xarray_dataset: Optional[bool] = None) -> List[str]: """Get the list of supported modes for `get_data(...)`. Args: use_opendap (bool): Whether to return the available - modes for opendap (NetCDF) data. - + modes for opendap `xarray.Dataset` data. + as_xarray_dataset (bool): An alias for `use_opendap`. + Returns: (List[str]) the available modalities. """ + if as_xarray_dataset is not None: + use_opendap = as_xarray_dataset + if use_opendap: return [ v for v in vars(self._opendap_data_api) if not v.startswith('_') @@ -602,16 +645,21 @@ def get_modes(self, use_opendap: bool = False) -> List[str]: return [v for v in vars(self._data_api) if not v.startswith('_')] @staticmethod - def save_netcdf_dataset(dataset: xarray.Dataset, output_filepath: str): + def save_xarray_dataset(dataset: xarray.Dataset, output_filepath: str, + **kwargs) -> None: """ - Saves a netCDF4 dataset from a temporary file to a user-specified file path. + Saves an `xarray.Dataset` to netCDF a user-specified file path. Args: dataset: The xarray dataset to save. output_filepath: The path to save the dataset to. + **kwargs: Additional keyword arguments to pass to `dataset.to_netcdf`. + + Returns: + None: The dataset is written to disk """ - dataset.to_netcdf(output_filepath) - + dataset.to_netcdf(output_filepath, **kwargs) + """ PRIVATE """ def _get_request_handler( @@ -692,18 +740,23 @@ def _handle_data(data: pd.DataFrame, def _handle_accumulate_data( self, - accumulated_data: Dict[str, List[Union[pd.DataFrame, dict, xarray.Dataset]]], + accumulated_data: Dict[str, List[Union[pd.DataFrame, dict, + xarray.Dataset]]], ) -> Union[pd.DataFrame, dict]: """Accumulate the data from multiple stations and modes.""" for k in list(accumulated_data.keys()): if not accumulated_data[k]: del accumulated_data[k] - + if not accumulated_data: return {} - - return_as_df = isinstance(accumulated_data[list(accumulated_data.keys())[-1]][0], pd.DataFrame) - use_opendap = isinstance(accumulated_data[list(accumulated_data.keys())[-1]][0], xarray.Dataset) + + return_as_df = isinstance( + accumulated_data[list(accumulated_data.keys())[-1]][0], + pd.DataFrame) + use_opendap = isinstance( + accumulated_data[list(accumulated_data.keys())[-1]][0], + xarray.Dataset) data: Union[List[pd.DataFrame], List[xarray.Dataset], dict] = [] if return_as_df or use_opendap else {} diff --git a/notebooks/overview.ipynb b/notebooks/overview.ipynb index 45c4fc5..cb5bc80 100644 --- a/notebooks/overview.ipynb +++ b/notebooks/overview.ipynb @@ -192,7 +192,7 @@ "
1344 rows × 12 columns
\n", + "1345 rows × 12 columns
\n", "" ], "text/plain": [ @@ -330,11 +330,11 @@ "3 13008 15.000 -38.000 0.0 \n", "4 13009 8.000 -38.000 0.0 \n", "... ... ... ... ... \n", - "1339 yata2 59.548 -139.733126 rows × 13 columns
\n", + "125 rows × 13 columns
\n", "" ], "text/plain": [ " Station Lat Lon Elevation \\\n", - "1279 tplm2 38.899 -76.436 0.0 \n", + "1280 tplm2 38.899 -76.436 0.0 \n", "213 44063 38.963 -76.448 0.0 \n", - "622 apam2 38.983 -76.479 1.4 \n", - "729 cpvm2 38.995 -76.388 3.4 \n", - "678 bslm2 38.781 -76.708 0.3 \n", + "621 apam2 38.983 -76.479 1.4 \n", + "730 cpvm2 38.995 -76.388 3.4 \n", + "676 bslm2 38.781 -76.708 0.3 \n", "... ... ... ... ... \n", - "693 cblo1 41.981 -80.556 177.0 \n", + "692 cblo1 41.981 -80.556 177.0 \n", "270 45142 42.740 -79.290 174.0 \n", "239 44402 39.314 -70.717 0.0 \n", - "1178 rprn6 43.263 -77.598 75.0 \n", - "1167 rcrn6 43.269 -77.626 76.7 \n", + "1179 rprn6 43.263 -77.598 75.0 \n", + "1168 rcrn6 43.269 -77.626 76.7 \n", "\n", " Name \\\n", - "1279 Thomas Point, MD \n", + "1280 Thomas Point, MD \n", "213 Annapolis, MD \n", - "622 8575512 - Annapolis, MD \n", - "729 8575437 - Chesapeake Bay Bridge Visibility, MD \n", - "678 Jug Bay, Chesapeake Bay Reserve, MD \n", + "621 8575512 - Annapolis, MD \n", + "730 8575437 - Chesapeake Bay Bridge Visibility, MD \n", + "676 Jug Bay, Chesapeake Bay Reserve, MD \n", "... ... \n", - "693 Conneaut Breakwater Light, OH \n", + "692 Conneaut Breakwater Light, OH \n", "270 Port Colborne \n", "239 SOUTHEAST BLOCK CANYON - 130 NM SE of Fire Isl... \n", - "1178 Rochester Coast Guard, NY \n", - "1167 9052058 - Rochester, NY \n", + "1179 Rochester Coast Guard, NY \n", + "1168 9052058 - Rochester, NY \n", "\n", " Owner \\\n", - "1279 NDBC \n", + "1280 NDBC \n", "213 Chesapeake Bay Interpretive Buoy System (CBIBS) \n", - "622 NOS \n", - "729 NOAA NOS PORTS \n", - "678 National Estuarine Research Reserve System \n", + "621 NOS \n", + "730 NOAA NOS PORTS \n", + "676 National Estuarine Research Reserve System \n", "... ... \n", - "693 NWS Eastern Region \n", + "692 NWS Eastern Region \n", "270 Environment and Climate Change Canada \n", "239 NDBC \n", - "1178 NWS Eastern Region \n", - "1167 NOS \n", + "1179 NWS Eastern Region \n", + "1168 NOS \n", "\n", " Program Type Includes Meteorology \\\n", - "1279 NDBC Meteorological/Ocean fixed True \n", - "213 IOOS Partners buoy True \n", - "622 NOS/CO-OPS fixed True \n", - "729 NOS/CO-OPS fixed True \n", - "678 NERRS fixed True \n", + "1280 NDBC Meteorological/Ocean fixed True \n", + "213 IOOS Partners buoy False \n", + "621 NOS/CO-OPS fixed True \n", + "730 NOS/CO-OPS fixed True \n", + "676 NERRS fixed True \n", "... ... ... ... \n", - "693 IOOS Partners fixed True \n", - "270 International Partners buoy True \n", + "692 IOOS Partners fixed True \n", + "270 International Partners buoy False \n", "239 Tsunami dart False \n", - "1178 IOOS Partners fixed True \n", - "1167 NOS/CO-OPS fixed True \n", + "1179 IOOS Partners fixed True \n", + "1168 NOS/CO-OPS fixed True \n", "\n", " Includes Currents Includes Water Quality DART Program distance \n", - "1279 False False False 2.177979 \n", - "213 True True False 9.369909 \n", - "622 False False False 12.225590 \n", - "729 False False False 13.307985 \n", - "678 False False False 26.506690 \n", + "1280 False False False 2.177979 \n", + "213 False False False 9.369909 \n", + "621 False False False 12.225590 \n", + "730 False False False 13.307985 \n", + "676 False False False 26.506690 \n", "... ... ... ... ... \n", - "693 False False False 491.192873 \n", + "692 False False False 491.192873 \n", "270 False False False 492.561301 \n", "239 False False True 495.825675 \n", - "1178 False False False 497.635930 \n", - "1167 False False False 498.756609 \n", + "1179 False False False 497.635930 \n", + "1168 False False False 498.756609 \n", "\n", - "[126 rows x 13 columns]" + "[125 rows x 13 columns]" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1030,14 +1042,7 @@ { "data": { "text/plain": [ - "{'Real time hourly standard meteorological': {'data directory': 'https://www.ndbc.noaa.gov/data/hourly2/',\n", - " 'description': 'https://www.ndbc.noaa.gov/faq/measdes.shtml#cwind'},\n", - " 'Real time standard meteorological data': {'Real time standard meteorological data': 'https://www.ndbc.noaa.gov/data/realtime2/TPLM2.txt',\n", - " 'description': 'https://www.ndbc.noaa.gov/faq/measdes.shtml#stdmet'},\n", - " 'Real time continuous winds data': {'Real time continuous winds data': 'https://www.ndbc.noaa.gov/data/realtime2/TPLM2.cwind',\n", - " 'description': 'https://www.ndbc.noaa.gov/faq/measdes.shtml#cwind'},\n", - " 'Real time derived measurements data': {'Real time derived measurements data': 'https://www.ndbc.noaa.gov/data/derived2/TPLM2.dmv',\n", - " 'description': 'https://www.ndbc.noaa.gov/faq/measdes.shtml#deriv'}}" + "['cwind', 'stdmet']" ] }, "execution_count": 13, @@ -1049,13 +1054,6 @@ "api.available_realtime(station_id=\"tplm2\")" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the response above, the `mode`s which are available for station `\"tplm2\"` are reported in plain text. Their indicators (`stdmet`, `cwind`) are also specified as the suffix of the URLS for the data files. " - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1090,7 +1088,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -1112,7 +1110,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1248,7 +1246,7 @@ "TIDE NaN NaN NaN" ] }, - "execution_count": 18, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1259,7 +1257,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -1309,7 +1307,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -1642,7 +1640,7 @@ " Timestamp('2020-01-02 00:00:00'): nan}}]}" ] }, - "execution_count": 20, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1666,7 +1664,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -1692,7 +1690,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1821,7 +1819,7 @@ "TIDE NaN NaN NaN" ] }, - "execution_count": 22, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1839,7 +1837,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -32878,7 +32876,7 @@ " ...}}]}" ] }, - "execution_count": 24, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -32906,23 +32904,23 @@ "source": [ "The NDBC API now supports retrieving data across stations and modes in the native `DODS NetCDF4` [format](https://dods.ndbc.noaa.gov/) provided by [THREDDS](https://dods.ndbc.noaa.gov/thredds/catalog/data/catalog.html). This data is retrieved from the DODS server through HTTPS, with the resulting data matching the quality-controlled records as closely as possible.\n", "\n", - "Data retrieval through THREDDS is controlled using the `use_opendap` argument (defaults to `False`).\n", + "Data retrieval through THREDDS is controlled using the `as_xarray_dataset` argument (defaults to `False`).\n", "\n", "As with the standard usage of the `get_data` method, the `start_time`, `end_time`, and `cols` arguments can be used to efficiently filter data during retrieval." ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "ds = api.get_data(station_ids=[\"tplm2\"], modes=[\"stdmet\"], start_time=\"2019-06-01\", end_time=\"2024-06-01\", use_opendap=True)" + "ds = api.get_data(station_ids=[\"tplm2\"], modes=[\"stdmet\"], start_time=\"2019-06-01\", end_time=\"2024-06-01\", as_xarray_dataset=True)" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -33303,7 +33301,7 @@ " * time (time) datetime64[ns] 319kB 2019-06-01 ... 2023-...\n", " * latitude (latitude) float32 4B 38.9\n", " * longitude (longitude) float32 4B -76.44\n", - "Data variables: (12/14)\n", + "Data variables: (12/13)\n", " wind_dir (time, latitude, longitude) float64 319kB 92.0 ....\n", " wind_spd (time, latitude, longitude) float32 159kB 1.8 .....\n", " gust (time, latitude, longitude) float32 159kB 1.9 .....\n", @@ -33311,12 +33309,12 @@ " dominant_wpd (time, latitude, longitude) timedelta64[ns] 319kB ...\n", " average_wpd (time, latitude, longitude) timedelta64[ns] 319kB ...\n", " ... ...\n", + " air_pressure (time, latitude, longitude) float32 159kB 1.01e+...\n", " air_temperature (time, latitude, longitude) float32 159kB 25.5 ....\n", " sea_surface_temperature (time, latitude, longitude) float32 159kB 23.7 ....\n", " dewpt_temperature (time, latitude, longitude) float32 159kB 17.2 ....\n", " visibility (time, latitude, longitude) float32 159kB nan .....\n", " water_level (time, latitude, longitude) float32 159kB nan .....\n", - " station_id <U5 20B 'tplm2'\n", "Attributes:\n", " institution: NOAA National Data Buoy Center and Participators in Data As...\n", " url: http://dods.ndbc.noaa.gov\n", @@ -33324,10 +33322,10 @@ " conventions: COARDS\n", " station: tplm2\n", " comment: Thomas Point, MD\n", - " location: 38.899 N 76.436 W