Skip to content

Commit

Permalink
Merge pull request #341 from opensafely-core/create-v4
Browse files Browse the repository at this point in the history
Create version 4.0
  • Loading branch information
alarthast authored Oct 8, 2024
2 parents e4fe728 + 2c5cb81 commit 840ea82
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 44 deletions.
19 changes: 13 additions & 6 deletions pipeline/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,28 @@
from types import SimpleNamespace


# The version of `project.yaml` where each feature was introduced
# The versions of `project.yaml` where each feature applies to
# Tuple of (version where the feature was introduced, version after which the feature is deprecated)
FEATURE_FLAGS_BY_VERSION = {
"UNIQUE_OUTPUT_PATH": 2,
"EXPECTATIONS_POPULATION": 3,
"UNIQUE_OUTPUT_PATH": (2, None),
"EXPECTATIONS_POPULATION": (3, 3),
"REMOVE_SUPPORT_FOR_COHORT_EXTRACTOR": (4, None),
}


LATEST_VERSION = max(FEATURE_FLAGS_BY_VERSION.values())
LATEST_VERSION = max([v[0] for v in FEATURE_FLAGS_BY_VERSION.values()])


def get_feature_flags_for_version(version: float) -> SimpleNamespace:
if version > LATEST_VERSION:
raise ValueError(f"The latest version is v{LATEST_VERSION}, but got v{version}")
feat = SimpleNamespace()

for k, v in FEATURE_FLAGS_BY_VERSION.items():
value = v <= version # is this feature turned on the requested version?
# is this feature turned on the requested version?
if v[1] is None:
value = v[0] <= version
else:
value = v[0] <= version <= v[1]

setattr(feat, k, value)

Expand Down
31 changes: 21 additions & 10 deletions pipeline/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
validate_databuilder_outputs,
validate_glob_pattern,
validate_no_kwargs,
validate_not_cohort_extractor_action,
validate_type,
)

Expand Down Expand Up @@ -238,7 +239,7 @@ def is_database_action(self) -> bool:
class Pipeline:
version: float
actions: dict[str, Action]
expectations: Expectations
expectations: Expectations | None

@classmethod
def build(
Expand All @@ -262,13 +263,18 @@ def build(
raise ValidationError(
f"`version` must be a number between 1 and {LATEST_VERSION}"
)
feat = get_feature_flags_for_version(version)

validate_type(actions, dict, "Project `actions` section")
actions = {
action_id: Action.build(action_id, **action_config)
for action_id, action_config in actions.items()
}

if feat.REMOVE_SUPPORT_FOR_COHORT_EXTRACTOR:
for config in actions.values():
validate_not_cohort_extractor_action(config)

seen: dict[Command, list[str]] = defaultdict(list)
for name, config in actions.items():
run = config.run
Expand All @@ -278,7 +284,7 @@ def build(
)
seen[run].append(name)

if get_feature_flags_for_version(version).UNIQUE_OUTPUT_PATH:
if feat.UNIQUE_OUTPUT_PATH:
# find duplicate paths defined in the outputs section
seen_files = []
for config in actions.values():
Expand All @@ -298,19 +304,24 @@ def build(
f"Action `{a.action_id}` references an unknown action in its `needs` list: {n}"
)

feat = get_feature_flags_for_version(version)
if feat.EXPECTATIONS_POPULATION:
if feat.REMOVE_SUPPORT_FOR_COHORT_EXTRACTOR:
if expectations is not None:
raise ValidationError(
"Project includes `expectations` section, which is not supported in this version"
)
elif feat.EXPECTATIONS_POPULATION:
if expectations is None:
raise ValidationError("Project must include `expectations` section")
else:
expectations = {"population_size": 1000}

validate_type(expectations, dict, "Project `expectations` section")
if "population_size" not in expectations:
raise ValidationError(
"Project `expectations` section must include `population_size` section",
)
expectations = Expectations.build(**expectations)
if expectations is not None:
validate_type(expectations, dict, "Project `expectations` section")
if "population_size" not in expectations:
raise ValidationError(
"Project `expectations` section must include `population_size` section",
)
expectations = Expectations.build(**expectations)

return cls(version, actions, expectations)

Expand Down
7 changes: 7 additions & 0 deletions pipeline/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ def validate_glob_pattern(pattern: str, privacy_level: str) -> None:
raise InvalidPatternError("is an absolute path")


def validate_not_cohort_extractor_action(action: Action) -> None:
if action.run.parts[0].startswith("cohortextractor"):
raise ValidationError(
f"Action {action.action_id} uses cohortextractor actions, which are not supported in this version."
)


def validate_cohortextractor_outputs(action_id: str, action: Action) -> None:
"""
Check cohortextractor's output config is valid for this command
Expand Down
29 changes: 9 additions & 20 deletions tests/fixtures/valid_yaml/project.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,29 @@
version: '3.0'

expectations:
population_size: 100
version: '4.0'

actions:
generate_cohort:
run: cohortextractor:latest generate_cohort
outputs:
highly_sensitive:
cohort: output/input.csv

generate_cohort_with_dummy_data:
# we provide --output-dir here to distinguish the action from the one above
run: cohortextractor:latest generate_cohort --output-dir output/extra
dummy_data_file: test-data/dummy-data.csv
generate_dataset:
run: ehrql:v1 generate-dataset analysis/dataset_definition.py --output output/dataset.csv.gz
outputs:
highly_sensitive:
cohort: output/extra/input.csv
dataset: output/dataset.csv.gz

prepare_data_m:
run: python:latest python analysis/filter_by_sex.py M output/input.csv male.csv
needs: [generate_cohort]
needs: [generate_dataset]
outputs:
highly_sensitive:
male_cohort: male*.csv

prepare_data_f:
run: python:latest python analysis/filter_by_sex.py F output/input.csv female.csv
needs: [generate_cohort]
needs: [generate_dataset]
outputs:
highly_sensitive:
female_cohort: female*.csv

prepare_data_with_quote_in_filename:
run: python:latest python analysis/filter_by_sex.py F output/input.csv "qu'ote.csv"
needs: [generate_cohort]
needs: [generate_dataset]
outputs:
highly_sensitive:
quote_cohort: "qu'ote*.csv"
Expand All @@ -52,14 +41,14 @@ actions:
run: minimal-action:v1.1.0 output/input.csv
config:
suffix: .backup
needs: [generate_cohort]
needs: [generate_dataset]
outputs:
highly_sensitive:
cohort: output/input.backup.csv

test_cancellation:
run: python:latest python analysis/filter_by_sex.py F output/input.csv somefile.csv
needs: [generate_cohort]
needs: [generate_dataset]
outputs:
highly_sensitive:
somefile: somefile.csv
19 changes: 19 additions & 0 deletions tests/test_features.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from pipeline.features import get_feature_flags_for_version


Expand All @@ -6,17 +8,34 @@ def test_get_feature_flags_for_version_with_v1():

assert not flags.UNIQUE_OUTPUT_PATH
assert not flags.EXPECTATIONS_POPULATION
assert not flags.REMOVE_SUPPORT_FOR_COHORT_EXTRACTOR


def test_get_feature_flags_for_version_with_v2():
flags = get_feature_flags_for_version(2)

assert flags.UNIQUE_OUTPUT_PATH
assert not flags.EXPECTATIONS_POPULATION
assert not flags.REMOVE_SUPPORT_FOR_COHORT_EXTRACTOR


def test_get_feature_flags_for_version_with_v3():
flags = get_feature_flags_for_version(3)

assert flags.UNIQUE_OUTPUT_PATH
assert flags.EXPECTATIONS_POPULATION
assert not flags.REMOVE_SUPPORT_FOR_COHORT_EXTRACTOR


def test_get_feature_flags_for_version_with_v4():
flags = get_feature_flags_for_version(4)

assert flags.UNIQUE_OUTPUT_PATH
assert not flags.EXPECTATIONS_POPULATION
assert flags.REMOVE_SUPPORT_FOR_COHORT_EXTRACTOR


def test_get_feature_flags_for_version_with_v5():
msg = "The latest version is v4, but got v5"
with pytest.raises(ValueError, match=msg):
get_feature_flags_for_version(5)
47 changes: 39 additions & 8 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,12 @@

def test_success():
data = {
"version": "3",
"expectations": {"population_size": 10},
"version": "4",
"actions": {
"action1": {
"run": "test:latest",
"outputs": {
"moderately_sensitive": {"cohort": "output.csv"},
"moderately_sensitive": {"dataset": "output.csv"},
},
},
},
Expand Down Expand Up @@ -130,6 +129,23 @@ def test_action_extraction_command_with_one_outputs():
assert len(outputs.values()) == 1


def test_cohortextractor_actions_not_used_after_v3():
data = {
"version": "4",
"actions": {
"generate_cohort": {
"run": "cohortextractor:latest generate_cohort",
"outputs": {
"highly_sensitive": {"cohort": "output/input.csv"},
},
},
},
}
msg = "uses cohortextractor actions, which are not supported in this version."
with pytest.raises(ValidationError, match=msg):
Pipeline.build(**data)


def test_command_properties():
data = {
"version": 1,
Expand Down Expand Up @@ -169,7 +185,23 @@ def test_expectations_before_v3_has_a_default_set():
assert config.expectations.population_size == 1000


def test_expectations_exists():
def test_expectations_does_not_exist_after_v3():
data = {
"version": 4,
"expectations": {},
"actions": {
"generate_dataset": {
"run": "ehrql:v1 generate-dataset args --output output/dataset.csv.gz",
"outputs": {"highly_sensitive": {"dataset": "output/dataset.csv.gz"}},
}
},
}
msg = "Project includes `expectations` section"
with pytest.raises(ValidationError, match=msg):
Pipeline.build(**data)


def test_expectations_exists_for_v3():
# our logic for this is custom so ensure it works as expected
data = {
"version": 3,
Expand All @@ -186,7 +218,7 @@ def test_expectations_exists():
Pipeline.build(**data)


def test_expectations_population_size_exists():
def test_expectations_population_size_exists_for_v3():
data = {
"version": 3,
"expectations": {},
Expand All @@ -203,7 +235,7 @@ def test_expectations_population_size_exists():
Pipeline.build(**data)


def test_expectations_population_size_is_a_number():
def test_expectations_population_size_is_a_number_for_v3():
data = {
"version": 3,
"expectations": {"population_size": "test"},
Expand All @@ -225,8 +257,7 @@ def test_pipeline_all_actions(test_file):
config = load_pipeline(test_file)

assert config.all_actions == [
"generate_cohort",
"generate_cohort_with_dummy_data",
"generate_dataset",
"prepare_data_m",
"prepare_data_f",
"prepare_data_with_quote_in_filename",
Expand Down

0 comments on commit 840ea82

Please sign in to comment.