Skip to content

Commit

Permalink
simplified month_hour data binning
Browse files Browse the repository at this point in the history
  • Loading branch information
tg359 authored and jamesramsden-bh committed Aug 7, 2023
1 parent 941bc69 commit 1fad56c
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 153 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import sys
from pathlib import Path

import matplotlib.pyplot as plt

# override "HOME" in case IT has set this to something other than default
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
import calendar
import warnings
from typing import Any, List, Tuple, Union

import numpy as np
import pandas as pd
from ladybug.datacollection import HourlyContinuousCollection
from ladybug.datatype.temperature import \
UniversalThermalClimateIndex as LB_UniversalThermalClimateIndex
from ladybug.datatype.temperature import (
UniversalThermalClimateIndex as LB_UniversalThermalClimateIndex,
)
from ladybug.epw import EPW
from ladybug_comfort.collection.solarcal import OutdoorSolarCal
from ladybug_comfort.collection.utci import UTCI
from scipy.interpolate import interp1d, interp2d

from ...categorical.categories import UTCI_DEFAULT_CATEGORIES, Categorical
from ...helpers import evaporative_cooling_effect
from ...ladybug_extension.datacollection import (collection_from_series,
collection_to_series)
from ...helpers import evaporative_cooling_effect, month_hour_binned_series
from ...ladybug_extension.datacollection import (
collection_from_series,
collection_to_series,
)
from ...plot.utilities import contrasting_color


Expand Down Expand Up @@ -489,23 +491,18 @@ def feasible_utci_category_limits(
return df


def month_time_binned_table(
def month_hour_binned(
utci_data: Union[pd.Series, HourlyContinuousCollection],
month_bins: Tuple[Tuple[int]] = ((3, 4, 5), (6, 7, 8), (9, 10, 11), (12, 1, 2)),
hour_bins: Tuple[Tuple[int]] = (
(5, 6, 7, 8, 9, 10, 11),
(12, 13, 14, 15, 16),
(17, 18, 19, 20),
(21, 22, 23, 0, 1, 2, 3, 4),
),
month_bins: Tuple[Tuple[int]] = None,
hour_bins: Tuple[Tuple[int]] = None,
utci_categories: Categorical = UTCI_DEFAULT_CATEGORIES,
color_result: bool = False,
month_labels: List[str] = None,
hour_labels: List[str] = None,
month_labels: Tuple[str] = None,
hour_labels: Tuple[str] = None,
agg: str = "mean",
**kwargs,
) -> pd.DataFrame:
"""Create a table with monthly time binned UTCI data.
"""Create a table with monthly hour binned UTCI data.
Args:
utci_data (Union[pd.Series, HourlyContinuousCollection]):
Expand Down Expand Up @@ -546,64 +543,14 @@ def month_time_binned_table(
if isinstance(utci_data, HourlyContinuousCollection):
utci_data = collection_to_series(utci_data)

# check that utci_data is annual hourly data
if len(utci_data) < 8760:
raise ValueError(
"utci_data must be hourly data over the course of at least a year (8760+ hours)."
)

# check for continuity of time periods, and overlaps overnight/year
flat_hours = [item for sublist in hour_bins for item in sublist]
flat_months = [item for sublist in month_bins for item in sublist]

if (max(flat_hours) != 23) or min(flat_hours) != 0:
raise ValueError("hour_bins hours must be in the range 0-23")
if (max(flat_months) != 12) or min(flat_months) != 1:
raise ValueError("month_bins hours must be in the range 1-12")
# cehck for duplicates
if len(set(flat_hours)) != len(flat_hours):
raise ValueError("hour_bins hours must not contain duplicates")
if len(set(flat_months)) != len(flat_months):
raise ValueError("month_bins hours must not contain duplicates")
if (set(flat_hours) != set(list(range(24)))) or (len(set(flat_hours)) != 24):
raise ValueError("Input hour_bins does not contain all hours of the day")
if (set(flat_months) != set(list(range(1, 13, 1)))) or (
len(set(flat_months)) != 12
):
raise ValueError("Input month_bins does not contain all months of the year")

# create index/column labels
if month_labels:
if len(month_labels) != len(month_bins):
raise ValueError("month_labels must be the same length as month_bins")
col_labels = month_labels
else:
col_labels = []
for months in month_bins:
if len(months) == 1:
col_labels.append(calendar.month_abbr[months[0]])
else:
col_labels.append(
f"{calendar.month_abbr[months[0]]} to {calendar.month_abbr[months[-1]]}"
)
if hour_labels:
if len(hour_labels) != len(hour_bins):
raise ValueError("time_labels must be the same length as hour_bins")
row_labels = hour_labels
else:
row_labels = [f"{i[0]:02d}:00 ≤ t < {i[-1] + 1:02d}:00" for i in hour_bins]

# create indexing bins
values = []
for months in month_bins:
month_mask = utci_data.index.month.isin(months)
inner_values = []
for hours in hour_bins:
mask = utci_data.index.hour.isin(hours) & month_mask
avg = utci_data.loc[mask].agg(agg)
inner_values.append(avg)
values.append(inner_values)
df = pd.DataFrame(values, index=col_labels, columns=row_labels).T
df = month_hour_binned_series(
series=utci_data,
month_bins=month_bins,
hour_bins=hour_bins,
month_labels=month_labels,
hour_labels=hour_labels,
agg=agg,
)

if color_result:
warnings.warn(
Expand Down
142 changes: 86 additions & 56 deletions LadybugTools_Engine/Python/src/ladybugtools_toolkit/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import calendar
import contextlib
import copy
import io
Expand All @@ -8,7 +9,6 @@
import re
import urllib.request
import warnings
from calendar import month_abbr
from datetime import datetime, timedelta
from enum import Enum, auto
from pathlib import Path
Expand Down Expand Up @@ -47,7 +47,7 @@ def ZeroPadPercentFormatter(x: float) -> str:
return f"{x:5.0%}"


def default_time_analysis_periods() -> List[AnalysisPeriod]:
def default_hour_analysis_periods() -> List[AnalysisPeriod]:
"""A set of generic Analysis Period objects, spanning times of day."""
f = io.StringIO()
with contextlib.redirect_stdout(f):
Expand Down Expand Up @@ -80,7 +80,7 @@ def default_combined_analysis_periods() -> List[AnalysisPeriod]:
f = io.StringIO()
with contextlib.redirect_stdout(f):
aps = []
for ap_time in default_time_analysis_periods():
for ap_time in default_hour_analysis_periods():
for ap_month in default_month_analysis_periods():
aps.append(
AnalysisPeriod(
Expand All @@ -103,7 +103,7 @@ def default_analysis_periods() -> List[AnalysisPeriod]:
AnalysisPeriod(),
]
aps.extend(default_month_analysis_periods())
aps.extend(default_time_analysis_periods())
aps.extend(default_hour_analysis_periods())
aps.extend(default_combined_analysis_periods())

return aps
Expand Down Expand Up @@ -1619,26 +1619,26 @@ def remove_leap_days(
return pd_object[~mask]


def time_binned_dataframe(
def month_hour_binned_series(
series: pd.Series,
hour_bins: List[List[int]] = None,
month_bins: List[List[int]] = None,
hour_bin_labels: List[List[int]] = None,
month_bin_labels: List[List[int]] = None,
month_bins: Tuple[Tuple[int]] = None,
hour_bins: Tuple[Tuple[int]] = None,
month_labels: Tuple[str] = None,
hour_labels: Tuple[str] = None,
agg: str = "mean",
) -> pd.DataFrame:
"""Bin a series by hour and month.
Args:
series (pd.Series):
A series with a datetime index.
hour_bins (List[List[int]], optional):
A list of lists of hours to bin by. Defaults to None which bins into 24 discrete hours.
month_bins (List[List[int]], optional):
A list of lists of months to bin by. Defaults to None which bins into 12 discrete months.
hour_bin_labels (List[str], optional):
hour_bins (Tuple[Tuple[int]], optional):
A list of lists of hours to bin by. Defaults to None which bins into the default_time_analysis_periods().
month_bins (Tuple[Tuple[int]], optional):
A list of lists of months to bin by. Defaults to None which bins into default_month_analysis_periods.
hour_labels (List[str], optional):
A list of labels to use for the hour bins. Defaults to None which just lists the hours in each bin.
month_bin_labels (List[str], optional):
month_labels (List[str], optional):
A list of labels to use for the month bins. Defaults to None which just lists the months in each bin.
agg (str, optional):
The aggregation method to use. Can be either "min", "mean", "median", "max" or "sum". Defaults to "mean".
Expand Down Expand Up @@ -1666,52 +1666,82 @@ def time_binned_dataframe(

# check that the series has at least 24-values per day
if series.groupby(series.index.day_of_year).count().min() < 24:
raise ValueError("The series must have at least 24-values per day")
raise ValueError("The series must contain at least 24-values per day")

# add name to series if no name found
if series.name is None:
series.name = "series"

# create generic bins if none are given by user
if hour_bins is None:
hour_bins = [[i] for i in range(24)]
# create generic month/hour sets for binning, from existing defaults if no input
if month_bins is None:
month_bins = [[i] for i in range(1, 13)]

# create generic bin labels if none are given by user
if hour_bin_labels is None:
hour_bin_labels = [", ".join([f"{j:02d}:00" for j in i]) for i in hour_bins]
if month_bin_labels is None:
month_bin_labels = [", ".join([month_abbr[j] for j in i]) for i in month_bins]
_months = np.arange(1, 13, 1)
month_bins = []
for ap in default_month_analysis_periods():
if ap.st_month == ap.end_month:
res = (ap.st_month,)
else:
length = ap.end_month - ap.st_month
res = tuple(np.roll(_months, -ap.st_month + 1)[: length + 1])
month_bins.append(res)
month_bins = tuple(month_bins)
if hour_bins is None:
_hours = np.arange(0, 24)
hour_bins = []
for ap in default_hour_analysis_periods():
if ap.st_hour == ap.end_hour:
res = (ap.st_hour,)
else:
length = ap.end_hour - ap.st_hour
res = tuple(np.roll(_hours, -ap.st_hour)[: length + 1])
hour_bins.append(res)
hour_bins = tuple(hour_bins)

# check for contiguity of time periods
flat_hours = [item for sublist in hour_bins for item in sublist]
flat_months = [item for sublist in month_bins for item in sublist]
if (max(flat_hours) != 23) or min(flat_hours) != 0:
raise ValueError("hour_bins hours must be in the range 0-23")
if (max(flat_months) != 12) or min(flat_months) != 1:
raise ValueError("month_bins hours must be in the range 1-12")
# cehck for duplicates
if len(set(flat_hours)) != len(flat_hours):
raise ValueError("hour_bins hours must not contain duplicates")
if len(set(flat_months)) != len(flat_months):
raise ValueError("month_bins hours must not contain duplicates")
if (set(flat_hours) != set(list(range(24)))) or (len(set(flat_hours)) != 24):
raise ValueError("Input hour_bins does not contain all hours of the day")
if (set(flat_months) != set(list(range(1, 13, 1)))) or (
len(set(flat_months)) != 12
):
raise ValueError("Input month_bins does not contain all months of the year")

# check that length of hour-bin-labels matches that of hour-bins
if len(hour_bin_labels) != len(hour_bins):
raise ValueError(
"Hour bin labels must be the same length as the number of hour bins."
)
if len(month_bin_labels) != len(month_bins):
raise ValueError(
"Month bin labels must be the same length as the number of month bins."
)
# create index/column labels
if month_labels:
if len(month_labels) != len(month_bins):
raise ValueError("month_labels must be the same length as month_bins")
col_labels = month_labels
else:
col_labels = []
for months in month_bins:
if len(months) == 1:
col_labels.append(calendar.month_abbr[months[0]])
else:
col_labels.append(
f"{calendar.month_abbr[months[0]]} to {calendar.month_abbr[months[-1]]}"
)
if hour_labels:
if len(hour_labels) != len(hour_bins):
raise ValueError("hour_labels must be the same length as hour_bins")
row_labels = hour_labels
else:
row_labels = [f"{i[0]:02d}:00 ≤ t < {i[-1] + 1:02d}:00" for i in hour_bins]

# check that hour and month bins are valid
if len(set([item for sublist in hour_bins for item in sublist])) != 24:
raise ValueError("Hour bins must contain all hours [0-23]")
if len(set([item for sublist in month_bins for item in sublist])) != 12:
raise ValueError("Month bins must contain all months [1-12]")

# convert series to dataframe with month/hour columns, and aggregate
df = series.to_frame()
df["hour"] = series.index.hour
df["month"] = series.index.month
a = []
# create indexing bins
values = []
for months in month_bins:
b = []
month_mask = series.index.month.isin(months)
inner_values = []
for hours in hour_bins:
b.append(
df[df.month.isin(months) & df.hour.isin(hours)][series.name].agg(agg)
)
a.append(b)
df = pd.DataFrame(a, index=month_bin_labels, columns=hour_bin_labels).T
mask = series.index.hour.isin(hours) & month_mask
aggregated = series.loc[mask].agg(agg)
inner_values.append(aggregated)
values.append(inner_values)
df = pd.DataFrame(values, index=col_labels, columns=row_labels).T

return df
Loading

0 comments on commit 1fad56c

Please sign in to comment.