Skip to content

Commit

Permalink
Robustify metobs table parsing and update integration tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mgcth committed Mar 25, 2024
1 parent 5357a92 commit 6d93a91
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 13 deletions.
48 changes: 41 additions & 7 deletions src/smhi/metobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,12 +356,15 @@ def __init__(
self.to = model.to

data_model = self._get_data(model.data)
stationdata = data_model.stationdata
stationdata = self._clean_columns(stationdata)
stationdata = self._drop_nan(data_model.stationdata)

if data_model.station is not None:
stationdata = self._set_dataframe_index(data_model.stationdata)
else:
pass
if (
self._has_datetime_columns(data_model.stationdata) is True
and not stationdata.empty
):
stationdata = self._set_dataframe_index(stationdata)

self.station = data_model.station
self.parameter = data_model.parameter
Expand Down Expand Up @@ -459,15 +462,46 @@ def _set_dataframe_index(self, stationdata: pd.DataFrame) -> pd.DataFrame:

return stationdata

def _drop_nan(self, df: pd.DataFrame) -> pd.DataFrame:
"""Drop nan from dataframe rows.
def _drop_nan(self, df: pd.DataFrame, n_cols: int = 2) -> pd.DataFrame:
"""Drop nan from dataframe rows by first first n cols.
Args:
df: dataframe
Returns
dataframe with dropped nan rows
"""
df.dropna(axis="index", how="all", subset=df.columns[:2], inplace=True)
df.dropna(axis="index", how="all", subset=df.columns[:n_cols], inplace=True)

return df

def _clean_columns(self, df: pd.DataFrame) -> pd.DataFrame:
"""Clean dataframe from non-data columns.
Always drop the last column.
Args:
df: dataframe
"""
df = df.iloc[:, :-1]
columns = df.columns
remove_columns = [col for col in columns if "unnamed" in col.lower()]
df.drop(columns=remove_columns, inplace=True)

return df

def _has_datetime_columns(self, df: pd.DataFrame) -> bool:
"""Check if datafram has any datetime column.
Args:
df: dataframe
"""
return any(
[
c
for c in self.metobs_parameter_tim
+ self.metobs_parameter_dygn
+ self.metobs_parameter_manad
if c in df.columns
]
)
29 changes: 25 additions & 4 deletions tests/integration/test_integration_metobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pd
import pytest
from smhi.metobs import Data, Parameters, Periods, Stations
from smhi.models.metobs_parameters import ParameterItem

METOBS_INTEGRATION = {}
for i in [1, 2, 22]:
Expand All @@ -31,7 +32,12 @@ class TestIntegrationMetobs:
"metobs",
"Meteorologiska observationer från SMHI: Välj "
+ "version (sedan parameter, station och tidsutsnitt)",
("1", "Lufttemperatur", "momentanvärde, 1 gång/tim"),
ParameterItem(
key="1",
title="Lufttemperatur",
summary="momentanvärde, 1 gång/tim",
unit="celsius",
),
(1, "Akalla"),
"corrected-archive",
"Lufttemperatur - Akalla - Kvalitetskontrollerade historiska "
Expand All @@ -48,7 +54,12 @@ class TestIntegrationMetobs:
"metobs",
"Meteorologiska observationer från SMHI: Välj "
+ "version (sedan parameter, station och tidsutsnitt)",
("1", "Lufttemperatur", "momentanvärde, 1 gång/tim"),
ParameterItem(
key="1",
title="Lufttemperatur",
summary="momentanvärde, 1 gång/tim",
unit="celsius",
),
(192840, "Karesuando A"),
"corrected-archive",
"Lufttemperatur - Karesuando A - Kvalitetskontrollerade "
Expand All @@ -65,7 +76,12 @@ class TestIntegrationMetobs:
"metobs",
"Meteorologiska observationer från SMHI: Välj "
+ "version (sedan parameter, station och tidsutsnitt)",
("2", "Lufttemperatur", "medelvärde 1 dygn, 1 gång/dygn, kl 00"),
ParameterItem(
key="2",
title="Lufttemperatur",
summary="medelvärde 1 dygn, 1 gång/dygn, kl 00",
unit="celsius",
),
(192840, "Karesuando A"),
"corrected-archive",
"Lufttemperatur - Karesuando A - Kvalitetskontrollerade "
Expand All @@ -82,7 +98,12 @@ class TestIntegrationMetobs:
"metobs",
"Meteorologiska observationer från SMHI: Välj "
+ "version (sedan parameter, station och tidsutsnitt)",
("22", "Lufttemperatur", "medel, 1 gång per månad"),
ParameterItem(
key="22",
title="Lufttemperatur",
summary="medel, 1 gång per månad",
unit="celsius",
),
(192840, "Karesuando A"),
"corrected-archive",
"Lufttemperatur - Karesuando A - Kvalitetskontrollerade "
Expand Down
32 changes: 30 additions & 2 deletions tests/unit/test_unit_metobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def setup_periods(setup_stations):

@pytest.fixture
def setup_periods_set(setup_stations):
"""Read in Periods from stationset response.
"""Read in Periods for station_set.
Returns:
mocked response
Expand All @@ -206,7 +206,7 @@ def setup_periods_set(setup_stations):
"""
_, mocked_model_stations, _, _ = setup_stations

mocked_response = get_response("tests/fixtures/metobs/periods.txt")
mocked_response = get_response("tests/fixtures/metobs/periods_set.txt")
mocked_model = PeriodModel.model_validate_json(mocked_response.content)
mocked_data = get_data("tests/fixtures/metobs/periods_data.json", "period")

Expand Down Expand Up @@ -250,6 +250,34 @@ def setup_data(setup_periods):
)


@pytest.fixture
def setup_data_set(setup_periods):
"""Read in Data response for station_set.
Returns:
mocked response
expected answer as pydantic structure
data data
"""
_, mocked_model_periods, _, _ = setup_periods_set

mocked_response = get_response("tests/fixtures/metobs/data.txt", encode=True)
mocked_model = DataModel.model_validate_json(mocked_response.content)
mocked_csv_data = get_data("tests/fixtures/metobs/data_csv.csv", "data")

mocked_parameter = get_data("tests/fixtures/metobs/data_parameter.json")
mocked_data = get_data("tests/fixtures/metobs/data_data.json")

return (
mocked_response,
mocked_model,
mocked_model_periods,
mocked_csv_data,
mocked_parameter,
mocked_data,
)


class TestUnitBaseMetobs:
"""Unit tests for BaseMetobs class."""

Expand Down

0 comments on commit 6d93a91

Please sign in to comment.