diff --git a/README.md b/README.md index b2f72de..131f0b9 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,6 @@ deciles-charts generates a line chart for each [measure table][1] in an input directory. The line chart has time on the horizontal axis (`x`) and value on the vertical axis (`y`). Deciles are plotted as dashed lines; -outer percentiles are plotted as dotted lines; the median is plotted as a solid line. For example, the following deciles chart was generated from dummy data: @@ -68,6 +67,29 @@ For each measure table, there will now be a corresponding deciles chart. For example, given a measure table called `measure_has_sbp_event_by_stp_code.csv`, there will now be a corresponding deciles chart called `deciles_chart_has_sbp_event_by_stp_code.png`. +## Configuration + +Pass configuration via the `config` property. +For example: + +```yaml +generate_deciles_charts: + run: > + deciles-charts:[version] + --input-files output/measure_*.csv + --output_dir output + config: + show_outer_percentiles: true + needs: [generate_measures] + outputs: + moderately_sensitive: + deciles_charts: output/deciles_chart_*.png +``` + +| Configuration | Default | Description | +| ------------------------ | ------- | ----------------------------------------------------------- | +| `show_outer_percentiles` | `false` | Show the top and bottom percentiles, as well as the deciles | + ## Notes for developers Please see [_DEVELOPERS.md_](DEVELOPERS.md). diff --git a/analysis/deciles_charts.py b/analysis/deciles_charts.py index b219250..e056494 100644 --- a/analysis/deciles_charts.py +++ b/analysis/deciles_charts.py @@ -1,5 +1,6 @@ import argparse import glob +import json import logging import pathlib import re @@ -9,6 +10,10 @@ from ebmdatalab import charts +DEFAULT_CONFIG = { + "show_outer_percentiles": False, +} + MEASURE_FNAME_REGEX = re.compile(r"measure_(?P\w+)\.csv") # replicate cohort-extractor's logging @@ -51,8 +56,13 @@ def drop_zero_denominator_rows(measure_table): return measure_table[is_not_inf].reset_index(drop=True) -def get_deciles_chart(measure_table): - return charts.deciles_chart(measure_table, period_column="date", column="value") +def get_deciles_chart(measure_table, config): + return charts.deciles_chart( + measure_table, + period_column="date", + column="value", + show_outer_percentiles=config["show_outer_percentiles"], + ) def write_deciles_chart(deciles_chart, path): @@ -67,6 +77,17 @@ def match_paths(pattern): return [get_path(x) for x in glob.glob(pattern)] +def parse_config(config_json): + user_config = json.loads(config_json) + config = DEFAULT_CONFIG.copy() + if invalid_keys := [k for k in user_config if k not in config]: + invalid_keys_str = ", ".join(invalid_keys) + raise RuntimeError(f"Invalid configuration: {invalid_keys_str}") + + config.update(user_config) + return config + + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( @@ -81,6 +102,12 @@ def parse_args(): type=get_path, help="Path to the output directory", ) + parser.add_argument( + "--config", + default=DEFAULT_CONFIG.copy(), + type=parse_config, + help="JSON-encoded configuration", + ) return parser.parse_args() @@ -88,10 +115,11 @@ def main(): args = parse_args() input_files = args.input_files output_dir = args.output_dir + config = args.config for measure_table in get_measure_tables(input_files): measure_table = drop_zero_denominator_rows(measure_table) - chart = get_deciles_chart(measure_table) + chart = get_deciles_chart(measure_table, config) id_ = measure_table.attrs["id"] fname = f"deciles_chart_{id_}.png" write_deciles_chart(chart, output_dir / fname) diff --git a/img/deciles_chart_has_sbp_event_by_stp_code.png b/img/deciles_chart_has_sbp_event_by_stp_code.png index 4b6bc91..9277cd0 100644 Binary files a/img/deciles_chart_has_sbp_event_by_stp_code.png and b/img/deciles_chart_has_sbp_event_by_stp_code.png differ diff --git a/tests/test_deciles_charts.py b/tests/test_deciles_charts.py index 9b6b0ee..74d0c8f 100644 --- a/tests/test_deciles_charts.py +++ b/tests/test_deciles_charts.py @@ -1,5 +1,6 @@ import numpy import pandas +import pytest from pandas import testing from analysis import deciles_charts @@ -75,7 +76,25 @@ def test_drop_zero_denominator_rows(): assert obs_measure_table.attrs is not measure_table.attrs # test it's a copy -def test_parse_args(tmp_path, monkeypatch): +def test_parse_config(): + with pytest.raises(RuntimeError, match=r"bad_key, worse_key$"): + deciles_charts.parse_config('{"bad_key": "", "worse_key": ""}') + + +@pytest.mark.parametrize( + "optional_arg,parsed_optional_arg", + [ + ( + (), + ("config", {"show_outer_percentiles": False}), # default + ), + ( + ("--config", '{"show_outer_percentiles": true}'), + ("config", {"show_outer_percentiles": True}), + ), + ], +) +def test_parse_args(tmp_path, monkeypatch, optional_arg, parsed_optional_arg): # arrange monkeypatch.chdir(tmp_path) monkeypatch.setattr( @@ -86,7 +105,8 @@ def test_parse_args(tmp_path, monkeypatch): "input/measure_*.csv", "--output-dir", "output", - ], + ] + + list(optional_arg), ) input_dir = tmp_path / "input" @@ -115,3 +135,4 @@ def test_parse_args(tmp_path, monkeypatch): # assert assert sorted(args.input_files) == sorted(input_files) assert args.output_dir == output_dir + assert getattr(args, parsed_optional_arg[0]) == parsed_optional_arg[1]