Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fill missing values in MRD and MSLP #31

Merged
12 commits merged into from
Apr 11, 2022
198 changes: 99 additions & 99 deletions README.md

Large diffs are not rendered by default.

30 changes: 13 additions & 17 deletions stormevents/nhc/atcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import itertools
from os import PathLike
from pathlib import Path
from typing import Any, Iterable, List, TextIO, Union
from typing import Iterable, List, TextIO, Union

import geopandas
from geopandas import GeoDataFrame
Expand Down Expand Up @@ -298,18 +298,6 @@ def atcf_url(
return url


def normalize_atcf_value(value: Any, to_type: type, round_digits: int = None,) -> Any:
if type(value).__name__ == 'Quantity':
value = value.magnitude
if not (value is None or pandas.isna(value) or value == ''):
if round_digits is not None and issubclass(to_type, (int, float)):
if isinstance(value, str):
value = float(value)
value = round(value, round_digits)
value = typepigeon.convert_value(value, to_type)
return value


def read_atcf(
atcf: Union[PathLike, io.BytesIO, TextIO],
advisories: List[ATCF_Advisory] = None,
Expand Down Expand Up @@ -340,11 +328,19 @@ def read_atcf(
for line in lines
)

data = DataFrame.from_records(lines, columns=list(ATCF_FIELDS),).astype(
{field: 'string' for field in ATCF_FIELDS}
data = DataFrame.from_records(lines)
data.rename(
columns={index: list(ATCF_FIELDS)[index] for index in range(len(data.columns))},
inplace=True,
)
for column in ATCF_FIELDS:
if column not in data.columns:
data[column] = pandas.NA
data.astype(
{field: 'string' for field in data.columns}, copy=False,
)

if data['USERDEFINED'].str.contains(',').any():
if 'USERDEFINED' in data and data['USERDEFINED'].str.contains(',').any():
if fort_22:
extra_fields = FORT_22_FIELDS
else:
Expand All @@ -366,7 +362,7 @@ def read_atcf(
except ValueError:
pass

if advisories is not None:
if advisories is not None and len(advisories) > 0:
data = data[data['TECH'].isin(advisories)]
if len(data) == 0:
raise ValueError(f'no ATCF records found matching "{advisories}"')
Expand Down
66 changes: 50 additions & 16 deletions stormevents/nhc/storms.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime
from functools import lru_cache
import re
from typing import Iterable, List
from typing import Iterable

from bs4 import BeautifulSoup
import numpy
Expand All @@ -28,12 +28,12 @@ def nhc_storms(year: int = None) -> pandas.DataFrame:
AL051851 UNNAMED TS 1851 AL 5 ARCHIVE 1851-09-13 00:00:00 1851-09-16 18:00:00
AL061851 UNNAMED TS 1851 AL 6 ARCHIVE 1851-10-16 00:00:00 1851-10-19 18:00:00
... ... ... ... ... ... ... ... ...
CP902021 INVEST LO 2021 CP 90 METWATCH 2021-07-24 12:00:00 NaT
CP912021 INVEST DB 2021 CP 91 METWATCH 2021-08-07 18:00:00 NaT
EP922021 INVEST DB 2021 EP 92 METWATCH 2021-06-05 06:00:00 NaT
AL952021 INVEST DB 2021 AL 95 METWATCH 2021-10-28 12:00:00 NaT
AL962021 INVEST EX 2021 AL 96 METWATCH 2021-11-07 12:00:00 NaT
EP712022 GENESIS001 DB 2022 EP 71 GENESIS 2022-01-20 12:00:00 NaT
EP902022 INVEST LO 2022 EP 90 METWATCH 2022-01-20 12:00:00 NaT
[2729 rows x 8 columns]
[2714 rows x 8 columns]
"""

url = 'https://ftp.nhc.noaa.gov/atcf/index/storm_list.txt'
Expand Down Expand Up @@ -95,27 +95,29 @@ def nhc_storms(year: int = None) -> pandas.DataFrame:
else:
storms = storms[storms['year'] == int(year)]

for string_column in ['nhc_code', 'name', 'class', 'source']:
storms[string_column] = storms[string_column].str.strip()

storms['nhc_code'] = storms['nhc_code'].str.strip()
storms.set_index('nhc_code', inplace=True)

gis_storms = nhc_storms_gis_archive(year=year)
gis_storms = gis_storms.drop(gis_storms[gis_storms.index.isin(storms.index)].index)
if len(gis_storms) > 0:
gis_storms[['start_date', 'end_date']] = pandas.to_datetime(numpy.nan)
storms = pandas.concat([storms, gis_storms[storms.columns]])
gis_archive_storms = nhc_storms_gis_archive(year=year)
gis_archive_storms = gis_archive_storms.drop(
gis_archive_storms[gis_archive_storms.index.isin(storms.index)].index
)
if len(gis_archive_storms) > 0:
gis_archive_storms[['start_date', 'end_date']] = pandas.to_datetime(numpy.nan)
storms = pandas.concat([storms, gis_archive_storms[storms.columns]])

for string_column in ['name', 'class', 'source']:
storms.loc[storms[string_column].str.len() == 0, string_column] = None
storms.loc[storms[string_column].str.len() == 0, string_column] = pandas.NA
storms[string_column] = storms[string_column].str.strip()
storms[string_column] = storms[string_column].astype('string')

storms.sort_values(['year', 'number', 'basin'], inplace=True)

return storms


@lru_cache(maxsize=None)
def nhc_storms_archive(year: int = None) -> List[str]:
def nhc_storms_archive(year: int = None) -> pandas.DataFrame:
url = 'https://ftp.nhc.noaa.gov/atcf/archive/storm.table'

columns = [
Expand Down Expand Up @@ -144,10 +146,37 @@ def nhc_storms_archive(year: int = None) -> List[str]:

storms = pandas.read_csv(url, header=0, names=columns)

storms = storms[
[
'nhc_code',
'name',
'class',
'year',
'basin',
'number',
'source',
'start_date',
'end_date',
]
]

if year is not None:
storms = storms[storms['year'] == year]
if isinstance(year, Iterable) and not isinstance(year, str):
storms = storms[storms['year'].isin(year)]
else:
storms = storms[storms['year'] == int(year)]

storms['nhc_code'] = storms['nhc_code'].str.strip()
storms.set_index('nhc_code', inplace=True)

storms.sort_values(['year', 'number', 'basin'], inplace=True)

for string_column in ['name', 'class', 'source']:
storms.loc[storms[string_column].str.len() == 0, string_column] = pandas.NA
storms[string_column] = storms[string_column].str.strip()
storms[string_column] = storms[string_column].astype('string')

return storms['nhc_code'].str.strip().to_list()
return storms


@lru_cache(maxsize=None)
Expand Down Expand Up @@ -228,4 +257,9 @@ def nhc_storms_gis_archive(year: int = None) -> pandas.DataFrame:

storms.sort_values(['year', 'basin', 'number'], inplace=True)

for string_column in ['name', 'class', 'source']:
storms.loc[storms[string_column].str.len() == 0, string_column] = pandas.NA
storms[string_column] = storms[string_column].str.strip()
storms[string_column] = storms[string_column].astype('string')

return storms[['name', 'class', 'year', 'basin', 'number', 'source']]
Loading