Skip to content

Commit

Permalink
Support multiple start/end dates
Browse files Browse the repository at this point in the history
  • Loading branch information
abdu558 committed Jul 28, 2024
1 parent dc23d01 commit 3d439ab
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 34 deletions.
49 changes: 40 additions & 9 deletions examples/NDBC_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"import hvplot.pandas\n",
"import pandas as pd\n",
"\n",
"from searvey._ndbc_api import fetch_ndbc_station,get_ndbc_stations\n",
"from searvey._ndbc_api import fetch_ndbc_station,get_ndbc_stations,_fetch_ndbc\n",
"logging.basicConfig(\n",
" level=20,\n",
" style=\"{\",\n",
Expand Down Expand Up @@ -59,7 +59,7 @@
"def plot_map(data, title):\n",
"\n",
" # Plot the world map\n",
" world_plot = data.hvplot(geo=True, tiles=True, hover_cols=[\"Station\", \"location\"])\n",
" world_plot = data.hvplot(geo=True, tiles=True, hover_cols=[\"Station\", \"location\"], title=title)\n",
" return world_plot.opts(width=800, height=500)\n",
"\n",
"\n",
Expand Down Expand Up @@ -144,18 +144,16 @@
"end_date = \"2023-01-10\"\n",
"\n",
"# Get data for selected stations (mode = 'stdmet' for standard meteorological data)\n",
"station_ids = east_coast_stations[\"Station\"].tolist()[1:3]\n",
"data_df = fetch_ndbc_station(\n",
" station_id=\"SRST2\",\n",
" mode=\"stdmet\",\n",
" start_date=start_date,\n",
" end_date=end_date,\n",
")\n",
"\n",
"e\n",
"#output is a dictionary of {station id : data}\n",
"data_to_plot = data_df[\"SRST2\"]\n",
"data_to_plot"
"# Remove columns with all NaN values (unavailable data)\n",
"data_df = data_df.dropna(axis=1, how='all')\n",
"data_df"
]
},
{
Expand All @@ -172,7 +170,40 @@
"outputs": [],
"source": [
"# Plot the data of data_to_plot\n",
"data_to_plot[[\"WSPD\", \"GST\", \"ATMP\"]].rename(columns={\"WSPD\": \"Wind Speed\", \"GST\": \"Wind Gust\", \"ATMP\": \"Air Temperature\"}).hvplot(grid=True)"
"data_df[[\"WSPD\", \"GST\", \"ATMP\"]].rename(columns={\"WSPD\": \"Wind Speed\", \"GST\": \"Wind Gust\", \"ATMP\": \"Air Temperature\"}).hvplot(grid=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get data from multiple station id using the internal method\n",
"\n",
"The internal method supports multiple start dates, this system works exactly the same as other data sources, where each item in the dates arrays correspond to the item in the station id.\n",
"\n",
"In the example below, data between the days 2023-01-10 and 2023-01-12 is fetched for station SRST2, data between 2023-01-12 and 2023-01-20 is fetched for station AGXC1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#You can get the stations in the region and store it in a list,this example stores 2 stations in a list. This can be used in the fetch ndbc function to get data for the stations\n",
"station_ids = east_coast_stations[\"Station\"].tolist()[1:3]\n",
"\n",
"data = _fetch_ndbc(\n",
" station_ids=[\"SRST2\",\"AGXC1\",\"BAXC1\"],\n",
" mode=\"stdmet\",\n",
" start_dates=[\"2023-01-10\", \"2023-01-12\", \"2023-01-14\"],\n",
" end_dates=[\"2023-01-12\", \"2023-01-20\", \"2023-01-30\"],\n",
" columns=[\"WSPD\", \"GST\", \"ATMP\"],\n",
")\n",
"\n",
"data\n",
"#output is a dictionary of {station id : data}\n",
"#data_df[\"SRST2\"] to get the data for station SRST2"
]
}
],
Expand All @@ -192,7 +223,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
58 changes: 42 additions & 16 deletions searvey/_ndbc_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import functools
import logging
from typing import List
from typing import List, Union

import geopandas as gpd
import multifutures
Expand Down Expand Up @@ -84,8 +84,8 @@ def get_ndbc_stations(
def _fetch_ndbc(
station_ids: List[str],
mode: str,
start_date: DatetimeLike | None = None,
end_date: DatetimeLike | None = None,
start_dates: Union[DatetimeLike, List[DatetimeLike]] = None,
end_dates: Union[DatetimeLike, List[DatetimeLike]] = None,
columns: list[str] | None = None,
multithreading_executor: multifutures.ExecutorProtocol | None = None,
) -> dict[str, pd.DataFrame]:
Expand All @@ -94,25 +94,35 @@ def _fetch_ndbc(
:param station_ids: A list of station identifiers.
:param mode: Data mode. One of ``'txt'``, ``'json'``, ``'spec'``.
:param start_date: The starting date of the query. Defaults to 7 days ago.
:param end_date: The finishing date of the query. Defaults to "now".
:param start_dates: The starting date of the query. Defaults to 7 days ago.
:param end_dates: The finishing date of the query. Defaults to "now".
:param columns:
:param multithreading_executor: A multithreading executor.
:return: A dictionary mapping station identifiers to their respective
TimeSeries.
"""
now = pd.Timestamp.now("utc")

# Ensure start_dates and end_dates are lists
if not isinstance(start_dates, list):
start_dates = [start_dates] * len(station_ids)
if not isinstance(end_dates, list):
end_dates = [end_dates] * len(station_ids)

# Ensure that each station has a start_date and end_date
if len(start_dates) != len(station_ids) or len(end_dates) != len(station_ids):
raise ValueError("Each station must have a start_date and end_date")

# Prepare arguments for each function call
func_kwargs = [
{
"station_id": station_id,
"mode": mode,
"start_time": _resolve_start_date(now, start_date)[0],
"end_time": _resolve_end_date(now, end_date)[0],
"start_date": _resolve_start_date(now, start_dates)[0],
"end_date": _resolve_end_date(now, end_dates)[0],
"columns": columns,
}
for station_id in station_ids
for station_id, start_dates, end_dates in zip(station_ids, start_dates, end_dates)
]

# Fetch data concurrently using multithreading
Expand All @@ -124,35 +134,51 @@ def _fetch_ndbc(

# Check for errors and collect results
multifutures.check_results(results)

dataframes = {
result.kwargs["station_id"]: result.result for result in results if result.exception is None # type: ignore[index]

}
return dataframes


def fetch_ndbc_station(
station_id: str,
mode: str,
start_time: pd.Timestamp,
end_time: pd.Timestamp,
start_date: pd.Timestamp,
end_date: pd.Timestamp,
columns: list[str] | None = None,
) -> pd.DataFrame:
"""Retrieve the TimeSeries of a single NDBC station."""
"""
Retrieve the TimeSeries of a single NDBC station.
Make a query to the NDBC API for data for ``station_id``
and return the results as a pandas dataframe.
:param station_id: The station identifier.
:param mode: Data mode. Read the example ndbc file for more info.
:param start_date: The starting date of the query.
:param end_date: The finishing date of the query.
:param columns: List of columns to retrieve.
:return: ``pandas.DataFrame`` with the station data.
"""
logger.info("NDBC-%s: Starting data retrieval: %s - %s", station_id, start_date, end_date)
try:
df = ndbc_api.get_data(
station_id=station_id,
mode=mode,
start_time=start_time,
end_time=end_time,
start_time=start_date,
end_time=end_date,
as_df=True,
cols=columns,
)

if df.empty:
logger.warning(f"No data available for station {station_id}")

logger.info("NDBC-%s: Finished data retrieval: %s - %s", station_id, start_date, end_date)

return df

except Exception as e:
logger.error(f"Error fetching data for station {station_id}: {str(e)}")
return pd.DataFrame()

return pd.DataFrame()
18 changes: 9 additions & 9 deletions tests/ndbc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def test_fetch_ndbc_station_data():
station_id="SRST2",
mode="stdmet",
# test that both formats work
start_date=datetime.date(2023, 1, 1),
end_date="2023-01-10",
start_dates=datetime.date(2023, 1, 1),
end_dates="2023-01-10"
)

assert isinstance(dataframes, dict)
Expand Down Expand Up @@ -70,7 +70,7 @@ def test_fetch_ndbc_station_data():
assert df.index[-1] == pd.to_datetime("2023-01-10 00:00:00")


def test__fetch_ndbc_data_multiple():
def test_fetch_ndbc_data_multiple():
"""
This test will attempt to get data for multiple stations.
"""
Expand All @@ -79,8 +79,8 @@ def test__fetch_ndbc_data_multiple():
station_ids=["STDM4", "TPLM2"],
mode="stdmet",
# test that both formats work
start_date=datetime.date(2023, 1, 1),
end_date="2023-01-10",
start_dates=[datetime.date(2023, 1, 1),"2023-01-01"],
end_dates=["2023-01-10","2023-01-20"]
)

assert isinstance(dataframes, dict)
Expand Down Expand Up @@ -133,16 +133,16 @@ def test__fetch_ndbc_data_multiple():
assert df1.index[0] == pd.to_datetime("2023-01-01 00:00:00")


def test__fetch_ndbc_data_multiple_unavaliable_avaliable_data():
def test_fetch_ndbc_data_multiple_unavaliable_avaliable_data():
"""
This is a test that makes sure that the function can handle when some stations have data and some don't.
"""
dataframes = ndbc._fetch_ndbc(
station_ids=["41001", "STDM4"],
mode="stdmet",
# test that both formats work
start_date=datetime.date(2023, 1, 1),
end_date="2023-01-10",
start_dates=[datetime.date(2023, 1, 1),"2023-01-01"],
end_dates=["2023-01-10","2023-01-20"]
)

assert isinstance(dataframes, dict)
Expand Down Expand Up @@ -172,4 +172,4 @@ def test__fetch_ndbc_data_multiple_unavaliable_avaliable_data():
]
)
assert df.index[0] == pd.to_datetime("2023-01-01 00:00:00")
assert df.index[-1] == pd.to_datetime("2023-01-10 00:00:00")
assert df.index[-1] == pd.to_datetime("2023-01-10 00:00:00")

0 comments on commit 3d439ab

Please sign in to comment.