diff --git a/analysis/config.py b/analysis/config.py index 6599178..452ee19 100644 --- a/analysis/config.py +++ b/analysis/config.py @@ -7,6 +7,11 @@ # study end date. should match date in project.yaml end_date = "2022-03-31" +# Vertical plot lines for financial year +# Leave an empty list if no lines needed +# If a date is out of range of the graph, it will not be visible +vertical_lines = ["2020-04-01", "2021-04-01"] + # Define a function for fun, I'm not really happy with this but it works # Definitely need some help improving this, need to learn how to write robust funs in py def calculate_date(date, months): diff --git a/analysis/rate_calculations.py b/analysis/rate_calculations.py index b7dc341..38e7d32 100644 --- a/analysis/rate_calculations.py +++ b/analysis/rate_calculations.py @@ -3,7 +3,7 @@ import pandas as pd import os from cohortextractor import Measure -from config import demographics, codelist_path, qof_measure_marker +from config import demographics, codelist_path, qof_measure_marker, vertical_lines from ebmdatalab import charts from study_definition import measures @@ -89,6 +89,7 @@ show_legend=True, ) + add_date_lines(bp002_decile_chart, vertical_lines) bp002_decile_chart.gcf().set_size_inches(15, 8) bp002_decile_chart.gca().set_yticklabels( ["{:.0f}%".format(x * 100) for x in bp002_decile_chart.gca().get_yticks()] @@ -126,6 +127,7 @@ column_to_plot="rate", category=None, y_label=None, + vlines=vertical_lines ) df_total.to_csv(os.path.join(OUTPUT_DIR, "rate_table_total.csv"), index=False) @@ -149,6 +151,7 @@ column_to_plot="rate", category=value.group_by[0], y_label=None, + vlines=vertical_lines ) df.to_csv( diff --git a/analysis/utilities.py b/analysis/utilities.py index a4da8ba..4932bc6 100644 --- a/analysis/utilities.py +++ b/analysis/utilities.py @@ -2,6 +2,7 @@ import numpy as np import matplotlib.pyplot as plt import matplotlib.dates as mdates +from dateutil import parser import os from pathlib import Path @@ -263,6 +264,21 @@ def get_percentage_practices(measure_table): return np.round((num_practices_in_study / num_practices_total) * 100, 2) +def add_date_lines(plt, vlines): + # TODO: Check that it is within the range? + for date in vlines: + try: + plt.vlines( + x=[pd.to_datetime(date)], + ymin=0, + ymax=100, + colors="orange", + ls="--", + ) + except parser._parser.ParserError: + # TODO: add logger and print warning on exception + # Skip any dates not in the correct format + continue def plot_measures( df, @@ -271,6 +287,7 @@ def plot_measures( column_to_plot, category=False, y_label="Percentage of achievement", + vlines=[] ): """Produce time series plot from measures table. One line is plotted for each sub category within the category column. @@ -314,6 +331,8 @@ def plot_measures( mdates.ConciseDateFormatter(plt.gca().xaxis.get_major_locator()) ) + add_date_lines(plt, vlines) + if category: plt.legend(df[category].unique(), loc="lower right")