From 83a4b969b7fe5388b143cad5e014ea4424f44a45 Mon Sep 17 00:00:00 2001
From: Becky Smith <becky.smith@thedatalab.org>
Date: Fri, 18 Aug 2023 14:05:12 +0100
Subject: [PATCH 1/7] Add a T1OO table to the test tpp schema

---
 tests/tpp_backend_setup.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/tpp_backend_setup.py b/tests/tpp_backend_setup.py
index 1ef713ad..ca342aac 100644
--- a/tests/tpp_backend_setup.py
+++ b/tests/tpp_backend_setup.py
@@ -1055,3 +1055,11 @@ class VmpMapping(Base):
 
     id = Column(String(collation="Latin1_General_CI_AS"))
     prev_id = Column(String(collation="Latin1_General_CI_AS"))
+
+
+class T1OO(Base):
+    __tablename__ = "T1OO"
+    # fake pk to satisfy the ORM
+    # Patient_ID might be the primary key, TBC
+    pk = Column(Integer, primary_key=True)
+    Patient_ID = Column(types.BIGINT)

From 02725df236158ed99f339386f11623dc787ffaa9 Mon Sep 17 00:00:00 2001
From: Becky Smith <becky.smith@thedatalab.org>
Date: Fri, 18 Aug 2023 15:27:58 +0100
Subject: [PATCH 2/7] Read the OPENSAFELY_INCLUDE_T1OO flag in StudyDefinition
 and pass to backends

---
 cohortextractor/emis_backend.py     | 1 +
 cohortextractor/study_definition.py | 6 ++++++
 cohortextractor/tpp_backend.py      | 8 ++++++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/cohortextractor/emis_backend.py b/cohortextractor/emis_backend.py
index a8b236fe..4716a00c 100644
--- a/cohortextractor/emis_backend.py
+++ b/cohortextractor/emis_backend.py
@@ -49,6 +49,7 @@ def __init__(
         covariate_definitions,
         temporary_database=None,
         dummy_data=False,  # Ignored
+        include_t1oo=False,  # Ignored
     ):
         self.database_url = database_url
         self.covariate_definitions = covariate_definitions
diff --git a/cohortextractor/study_definition.py b/cohortextractor/study_definition.py
index 85d9d542..403f837b 100644
--- a/cohortextractor/study_definition.py
+++ b/cohortextractor/study_definition.py
@@ -36,6 +36,11 @@ def __init__(
         self._original_default_expectations = default_expectations or {}
         self.set_index_date(index_date)
         self.pandas_csv_args = self.get_pandas_csv_args(self.covariate_definitions)
+
+        self.include_t1oo = (
+            os.environ.get("OPENSAFELY_INCLUDE_T1OO", "").lower().strip() == "true"
+        )
+
         self.database_url = os.environ.get("DATABASE_URL")
         self.temporary_database = os.environ.get("TEMP_DATABASE_NAME")
         if self.database_url:
@@ -200,6 +205,7 @@ def create_backend(self, database_url=None, dummy_data=False):
             self.covariate_definitions,
             temporary_database=self.temporary_database,
             dummy_data=dummy_data,
+            include_t1oo=self.include_t1oo,
         )
 
     def recreate_backend(self):
diff --git a/cohortextractor/tpp_backend.py b/cohortextractor/tpp_backend.py
index 851789b0..b7fbd994 100644
--- a/cohortextractor/tpp_backend.py
+++ b/cohortextractor/tpp_backend.py
@@ -47,6 +47,7 @@ def __init__(
         covariate_definitions,
         temporary_database=None,
         dummy_data=False,
+        include_t1oo=False,
     ):
         self.database_url = database_url
         self.covariate_definitions = covariate_definitions
@@ -55,8 +56,11 @@ def __init__(
         self.next_temp_table_id = 1
         self._therapeutics_table_name = None
         self.truncate_sql_logs = False
+
         if self.covariate_definitions:
-            self.queries = self.get_queries(self.covariate_definitions)
+            self.queries = self.get_queries(
+                self.covariate_definitions, include_t1oo=include_t1oo
+            )
         else:
             self.queries = []
 
@@ -307,7 +311,7 @@ def get_therapeutic_risk_groups(self):
                 therapeutics_risk_group_variables.add(name)
         return therapeutics_risk_group_variables
 
-    def get_queries(self, covariate_definitions):
+    def get_queries(self, covariate_definitions, include_t1oo=False):
         output_columns = {}
         table_queries = {}
         for name, (query_type, query_args) in covariate_definitions.items():

From e3ca61625e053da562b37bbb427e9aef17644516 Mon Sep 17 00:00:00 2001
From: Becky Smith <becky.smith@thedatalab.org>
Date: Fri, 18 Aug 2023 15:29:17 +0100
Subject: [PATCH 3/7] feat: Exclude T1OO patients by default in TPP backend

---
 cohortextractor/tpp_backend.py | 22 ++++++++++++++-
 tests/conftest.py              | 12 +++++++++
 tests/test_tpp_backend.py      | 49 ++++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/cohortextractor/tpp_backend.py b/cohortextractor/tpp_backend.py
index b7fbd994..4abba0e3 100644
--- a/cohortextractor/tpp_backend.py
+++ b/cohortextractor/tpp_backend.py
@@ -387,7 +387,27 @@ def get_queries(self, covariate_definitions, include_t1oo=False):
             for name in table_queries
             if name != "population"
         ]
+
+        wheres = [f'{output_columns["population"]} = 1']
+
+        def get_t1oo_exclude_expressions():
+            # If this query has been explictly flagged as including T1OO patients then
+            # return unmodified
+            if include_t1oo:
+                return [], []
+            # Otherwise we add an extra LEFT OUTER JOIN on the T1OO table and
+            # WHERE clause which will exclude any patient IDs found in the T1OO table
+            return (
+                [f"LEFT OUTER JOIN T1OO ON T1OO.Patient_ID = {patient_id_expr}"],
+                ["T1OO.Patient_ID IS null"],
+            )
+
+        t100_join, t1oo_where = get_t1oo_exclude_expressions()
+        joins.extend(t100_join)
         joins_str = "\n          ".join(joins)
+        wheres.extend(t1oo_where)
+        where_str = " AND ".join(wheres)
+
         joined_output_query = f"""
         -- Join all columns for final output
         SELECT
@@ -395,7 +415,7 @@ def get_queries(self, covariate_definitions, include_t1oo=False):
         FROM
           {primary_table}
           {joins_str}
-        WHERE {output_columns["population"]} = 1
+        WHERE {where_str}
         """
         all_queries = []
         for sql_list in table_queries.values():
diff --git a/tests/conftest.py b/tests/conftest.py
index 31d81033..0b9974c6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,8 @@
 import os
 from pathlib import Path
 
+import pytest
+
 
 def pytest_generate_tests(metafunc):
     """Source the test environment"""
@@ -9,3 +11,13 @@ def pytest_generate_tests(metafunc):
         k = k.strip()
         if not os.environ.get(k):
             os.environ[k] = v.strip()
+
+
+@pytest.fixture()
+def include_t1oo():
+    # This flag should only be passed by job-runner, if it's
+    # been set in tests, something is wrong
+    assert "OPENSAFELY_INCLUDE_T1OO" not in os.environ
+    os.environ["OPENSAFELY_INCLUDE_T1OO"] = "True"
+    yield
+    del os.environ["OPENSAFELY_INCLUDE_T1OO"]
diff --git a/tests/test_tpp_backend.py b/tests/test_tpp_backend.py
index 482b22cd..0b944bf7 100644
--- a/tests/test_tpp_backend.py
+++ b/tests/test_tpp_backend.py
@@ -30,6 +30,7 @@
     EC,
     ICNARC,
     OPA,
+    T1OO,
     UKRR,
     APCS_Der,
     Appointment,
@@ -128,6 +129,7 @@ def setup_function(function):
     session.query(UKRR).delete()
     session.query(Patient).delete()
     session.query(BuildProgress).delete()
+    session.query(T1OO).delete()
 
     session.commit()
 
@@ -181,6 +183,53 @@ def test_minimal_study_with_reserved_keywords():
     assert_results(study.to_dicts(), all=["M", "F"], asc=["40", "55"])
 
 
+def test_minimal_study_with_t1oo_default():
+    # Test that type 1 opt-outs are excluded by default
+    assert "OPENSAFELY_INCLUDE_T1OO" not in os.environ
+    session = make_session()
+    patient_1 = Patient(Patient_ID=1, DateOfBirth="1980-01-01", Sex="M")
+    patient_2 = Patient(Patient_ID=2, DateOfBirth="1965-01-01", Sex="F")
+    t1oo_1 = T1OO(Patient_ID=1)
+    session.add_all([patient_1, patient_2, t1oo_1])
+    session.commit()
+    study = StudyDefinition(
+        population=patients.all(),
+        all=patients.sex(),
+        asc=patients.age_as_of("2020-01-01"),
+    )
+    # patient_1 (M, age 40) excluded
+    assert_results(study.to_dicts(), all=["F"], asc=["55"])
+
+
+@pytest.mark.parametrize(
+    "flag,expected",
+    [
+        ("", ["1", "4"]),
+        ("False", ["1", "4"]),
+        ("false", ["1", "4"]),
+        ("1", ["1", "4"]),
+        ("True", ["1", "2", "3", "4"]),
+        ("true", ["1", "2", "3", "4"]),
+    ],
+)
+def test_minimal_study_with_t1oo_flag(flag, expected, include_t1oo):
+    os.environ["OPENSAFELY_INCLUDE_T1OO"] = flag
+    # Test that type 1 opt-outs are only included if flag is explicitly set to "True"
+    session = make_session()
+    patient_1 = Patient(Patient_ID=1, DateOfBirth="1980-01-01", Sex="M")
+    patient_2 = Patient(Patient_ID=2, DateOfBirth="1965-01-01", Sex="F")
+    patient_3 = Patient(Patient_ID=3, DateOfBirth="1975-01-01", Sex="F")
+    patient_4 = Patient(Patient_ID=4, DateOfBirth="1985-01-01", Sex="F")
+    t1oo_2 = T1OO(Patient_ID=2)
+    t1oo_3 = T1OO(Patient_ID=3)
+    session.add_all([patient_1, patient_2, patient_3, patient_4, t1oo_2, t1oo_3])
+    session.commit()
+    study = StudyDefinition(
+        population=patients.all(),
+    )
+    assert_results(study.to_dicts(), patient_id=expected)
+
+
 @pytest.mark.parametrize("format", ["csv", "csv.gz", "feather", "dta", "dta.gz"])
 def test_study_to_file_with_therapeutic_risk_groups(tmp_path, format):
     session = make_session()

From 3fdd507a15b6f5c338c4a1342a9a099942948b80 Mon Sep 17 00:00:00 2001
From: Becky Smith <becky.smith@thedatalab.org>
Date: Wed, 23 Aug 2023 12:50:36 +0100
Subject: [PATCH 4/7] Update logging tests

---
 tests/test_logging.py | 68 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 59 insertions(+), 9 deletions(-)

diff --git a/tests/test_logging.py b/tests/test_logging.py
index cc06496a..d8b3a0cf 100644
--- a/tests/test_logging.py
+++ b/tests/test_logging.py
@@ -107,10 +107,11 @@ def test_study_definition_initial_stats_logging(logger):
     )
     assert get_stats_logs(logger.entries) == [
         # output columns include patient_id, and the 4 variables defined in the
-        # study defniiton, including event_date_2, which is defined as a parameter to
-        # event_min_date
+        # study definition - population, event_date_1, event_min_date and event_date_2,
+        # which is defined as a parameter to event_min_date
         # tables - Patient, temp event table for each codelist
-        {"output_column_count": 5, "table_count": 3, "table_joins_count": 2},
+        # joins - Patient to each event table, and t1oo
+        {"output_column_count": 5, "table_count": 3, "table_joins_count": 3},
         # variable_count is a count of the top-level variables defined in the study def (i.e. not event_date_2)
         {"variable_count": 4},
         # 2 variables use a codelist (event_date_1, and the nested event_date_2)
@@ -134,10 +135,10 @@ def test_stats_logging_tpp_backend(logger):
 
     # initial stats
     expected_initial_study_def_logs = [
-        # output columns include patient_id, and the 2 variables defined in the
-        # study defniiton
-        # tables - Patient, temp event table for codelist
-        {"output_column_count": 3, "table_count": 2, "table_joins_count": 1},
+        # output columns include patient_id, and the 2 variables (population, event)
+        # defined in the study defniiton
+        # tables - Patient, temp event table, t1oo for codelist
+        {"output_column_count": 3, "table_count": 2, "table_joins_count": 2},
         {"variable_count": 2},
         {"variables_using_codelist_count": 1},
         {"variable_using_codelist": "event", "codelist_size": 1},
@@ -195,6 +196,55 @@ def test_stats_logging_tpp_backend(logger):
     )
 
 
+def test_stats_logging_tpp_backend_with_t1oo(logger, include_t1oo):
+    # The query counter is a global at the module level, so it isn't reset between tests
+    # Find the next position (without incrementing it); this is the start of the test's timing logs
+    start_counter = timing_log_counter.next
+
+    study = StudyDefinition(
+        population=patients.all(),
+    )
+    study.to_dicts()
+
+    # initial stats
+    expected_initial_study_def_logs = [
+        # output columns include patient_id and population
+        # tables - Patient table only
+        # no joins because t1oo are included, so there is no need to join on the
+        # t1oo table to exclude them
+        {"output_column_count": 2, "table_count": 1, "table_joins_count": 0},
+        {"variable_count": 1},
+        {"variables_using_codelist_count": 0},
+    ]
+    # timing stats
+    # logs in tpp_backend during query execution
+
+    expected_timing_log_params = [
+        *_sql_execute_timing_logs(
+            description="Query for population",
+            sql="SELECT * INTO #population",
+            timing_id=start_counter,
+        ),
+        *_sql_execute_timing_logs(
+            description=None,
+            sql="CREATE CLUSTERED INDEX patient_id_ix ON #population (patient_id)",
+            timing_id=start_counter + 1,
+            is_truncated=False,
+        ),
+        *_sql_execute_timing_logs(
+            description="Join all columns for final output",
+            sql="#population.patient_id AS [patient_id]",
+            timing_id=start_counter + 2,
+        ),
+    ]
+    assert_stats_logs(
+        logger,
+        expected_initial_study_def_logs,
+        expected_timing_log_params,
+        downloaded=False,
+    )
+
+
 @patch("cohortextractor.cohortextractor.preflight_generation_check")
 @patch(
     "cohortextractor.cohortextractor.list_study_definitions",
@@ -239,7 +289,7 @@ def test_stats_logging_generate_cohort(
     expected_initial_study_def_logs = [
         # these 3 are logged from StudyDefinition instantiation
         # patient_id, population, sex - all from patient table, but we make one temp # table per variable
-        {"output_column_count": 3, "table_count": 2, "table_joins_count": 1},
+        {"output_column_count": 3, "table_count": 2, "table_joins_count": 2},
         {"variable_count": 2},  # population, sex
         {"variables_using_codelist_count": 0},
         # index_date_count logged from generate_cohort
@@ -391,7 +441,7 @@ def test_stats_logging_generate_cohort_with_index_dates(
         {"index_date_count": 3},
         {"min_index_date": "2020-01-01", "max_index_date": "2020-03-01"},
         # output_column/table/joins_count is logged in tpp_backend on backend instantiation so it's repeated for each index date
-        *[{"output_column_count": 3, "table_count": 2, "table_joins_count": 1}] * 4,
+        *[{"output_column_count": 3, "table_count": 2, "table_joins_count": 2}] * 4,
         *[
             {"resetting_backend_index_date": ix_date}
             for ix_date in expected_index_dates

From 38c0bba93e2614000a352bf61349b58d4d267464 Mon Sep 17 00:00:00 2001
From: Becky Smith <becky.smith@thedatalab.org>
Date: Wed, 13 Sep 2023 12:29:44 +0100
Subject: [PATCH 5/7] Use dsn parameter to indicate t1oo inclusion

---
 cohortextractor/emis_backend.py     |  1 -
 cohortextractor/study_definition.py |  5 ---
 cohortextractor/tpp_backend.py      | 36 ++++++++++++---
 tests/conftest.py                   | 12 -----
 tests/test_logging.py               | 12 ++++-
 tests/test_tpp_backend.py           | 70 +++++++++++++++++++++++++++--
 6 files changed, 107 insertions(+), 29 deletions(-)

diff --git a/cohortextractor/emis_backend.py b/cohortextractor/emis_backend.py
index 4716a00c..a8b236fe 100644
--- a/cohortextractor/emis_backend.py
+++ b/cohortextractor/emis_backend.py
@@ -49,7 +49,6 @@ def __init__(
         covariate_definitions,
         temporary_database=None,
         dummy_data=False,  # Ignored
-        include_t1oo=False,  # Ignored
     ):
         self.database_url = database_url
         self.covariate_definitions = covariate_definitions
diff --git a/cohortextractor/study_definition.py b/cohortextractor/study_definition.py
index 403f837b..c4bc2a25 100644
--- a/cohortextractor/study_definition.py
+++ b/cohortextractor/study_definition.py
@@ -37,10 +37,6 @@ def __init__(
         self.set_index_date(index_date)
         self.pandas_csv_args = self.get_pandas_csv_args(self.covariate_definitions)
 
-        self.include_t1oo = (
-            os.environ.get("OPENSAFELY_INCLUDE_T1OO", "").lower().strip() == "true"
-        )
-
         self.database_url = os.environ.get("DATABASE_URL")
         self.temporary_database = os.environ.get("TEMP_DATABASE_NAME")
         if self.database_url:
@@ -205,7 +201,6 @@ def create_backend(self, database_url=None, dummy_data=False):
             self.covariate_definitions,
             temporary_database=self.temporary_database,
             dummy_data=dummy_data,
-            include_t1oo=self.include_t1oo,
         )
 
     def recreate_backend(self):
diff --git a/cohortextractor/tpp_backend.py b/cohortextractor/tpp_backend.py
index 4abba0e3..b896d747 100644
--- a/cohortextractor/tpp_backend.py
+++ b/cohortextractor/tpp_backend.py
@@ -6,6 +6,7 @@
 import re
 import uuid
 from functools import cached_property
+from urllib import parse
 
 import pandas
 import structlog
@@ -40,6 +41,7 @@
 class TPPBackend:
     _db_connection = None
     _current_column_name = None
+    include_t1oo = False
 
     def __init__(
         self,
@@ -47,9 +49,13 @@ def __init__(
         covariate_definitions,
         temporary_database=None,
         dummy_data=False,
-        include_t1oo=False,
     ):
-        self.database_url = database_url
+        if database_url is not None:
+            # set self.include_t1oo from the database url
+            self.database_url = self.modify_dsn(database_url)
+        else:
+            self.database_url = database_url
+
         self.covariate_definitions = covariate_definitions
         self.temporary_database = temporary_database
         self.dummy_data = dummy_data
@@ -58,12 +64,28 @@ def __init__(
         self.truncate_sql_logs = False
 
         if self.covariate_definitions:
-            self.queries = self.get_queries(
-                self.covariate_definitions, include_t1oo=include_t1oo
-            )
+            self.queries = self.get_queries(self.covariate_definitions)
         else:
             self.queries = []
 
+    def modify_dsn(self, dsn):
+        """
+        Removes the `opensafely_include_t1oo` parameter if present and uses it to set
+        the `include_t1oo` attribute accordingly
+        """
+        parts = parse.urlparse(dsn)
+        params = parse.parse_qs(parts.query, keep_blank_values=True)
+        include_t1oo_values = params.pop("opensafely_include_t1oo", [])
+        if len(include_t1oo_values) == 1:
+            self.include_t1oo = include_t1oo_values[0].lower() == "true"
+        elif len(include_t1oo_values) != 0:
+            raise ValueError(
+                "`opensafely_include_t1oo` parameter must not be supplied more than once"
+            )
+        new_query = parse.urlencode(params, doseq=True)
+        new_parts = parts._replace(query=new_query)
+        return parse.urlunparse(new_parts)
+
     def to_file(self, filename):
         queries = list(self.queries)
         # If we have a temporary database available we write results to a table
@@ -311,7 +333,7 @@ def get_therapeutic_risk_groups(self):
                 therapeutics_risk_group_variables.add(name)
         return therapeutics_risk_group_variables
 
-    def get_queries(self, covariate_definitions, include_t1oo=False):
+    def get_queries(self, covariate_definitions):
         output_columns = {}
         table_queries = {}
         for name, (query_type, query_args) in covariate_definitions.items():
@@ -393,7 +415,7 @@ def get_queries(self, covariate_definitions, include_t1oo=False):
         def get_t1oo_exclude_expressions():
             # If this query has been explictly flagged as including T1OO patients then
             # return unmodified
-            if include_t1oo:
+            if self.include_t1oo:
                 return [], []
             # Otherwise we add an extra LEFT OUTER JOIN on the T1OO table and
             # WHERE clause which will exclude any patient IDs found in the T1OO table
diff --git a/tests/conftest.py b/tests/conftest.py
index 0b9974c6..31d81033 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,8 +1,6 @@
 import os
 from pathlib import Path
 
-import pytest
-
 
 def pytest_generate_tests(metafunc):
     """Source the test environment"""
@@ -11,13 +9,3 @@ def pytest_generate_tests(metafunc):
         k = k.strip()
         if not os.environ.get(k):
             os.environ[k] = v.strip()
-
-
-@pytest.fixture()
-def include_t1oo():
-    # This flag should only be passed by job-runner, if it's
-    # been set in tests, something is wrong
-    assert "OPENSAFELY_INCLUDE_T1OO" not in os.environ
-    os.environ["OPENSAFELY_INCLUDE_T1OO"] = "True"
-    yield
-    del os.environ["OPENSAFELY_INCLUDE_T1OO"]
diff --git a/tests/test_logging.py b/tests/test_logging.py
index d8b3a0cf..ac307dc6 100644
--- a/tests/test_logging.py
+++ b/tests/test_logging.py
@@ -1,3 +1,4 @@
+import os
 import re
 from collections import Counter
 from unittest.mock import patch
@@ -16,6 +17,15 @@
 )
 
 
+@pytest.fixture
+def set_database_url_with_t1oo(monkeypatch):
+    if "TPP_DATABASE_URL" in os.environ:
+        monkeypatch.setenv(
+            "DATABASE_URL",
+            f'{os.environ["TPP_DATABASE_URL"]}?opensafely_include_t1oo=True',
+        )
+
+
 @pytest.fixture(name="logger")
 def fixture_logger():
     """Modify `capture_logs` to keep reference to `processors` list intact,
@@ -196,7 +206,7 @@ def test_stats_logging_tpp_backend(logger):
     )
 
 
-def test_stats_logging_tpp_backend_with_t1oo(logger, include_t1oo):
+def test_stats_logging_tpp_backend_with_t1oo(logger, set_database_url_with_t1oo):
     # The query counter is a global at the module level, so it isn't reset between tests
     # Find the next position (without incrementing it); this is the start of the test's timing logs
     start_counter = timing_log_counter.next
diff --git a/tests/test_tpp_backend.py b/tests/test_tpp_backend.py
index 0b944bf7..dc8baa3f 100644
--- a/tests/test_tpp_backend.py
+++ b/tests/test_tpp_backend.py
@@ -80,6 +80,18 @@ def set_database_url(monkeypatch):
         monkeypatch.setenv("DATABASE_URL", os.environ["TPP_DATABASE_URL"])
 
 
+@pytest.fixture
+def set_database_url_with_t1oo(monkeypatch):
+    def set_db_url(t1oo_value):
+        if "TPP_DATABASE_URL" in os.environ:
+            monkeypatch.setenv(
+                "DATABASE_URL",
+                f'{os.environ["TPP_DATABASE_URL"]}?opensafely_include_t1oo={t1oo_value}',
+            )
+
+    return set_db_url
+
+
 def setup_module(module):
     make_database()
 
@@ -134,6 +146,59 @@ def setup_function(function):
     session.commit()
 
 
+@pytest.mark.parametrize(
+    "dsn_in,dsn_out,t1oo_status",
+    [
+        (
+            "mssql://user:pass@localhost:4321/db",
+            "mssql://user:pass@localhost:4321/db",
+            False,
+        ),
+        (
+            "mssql://user:pass@localhost:4321/db?param1=one&param2&param1=three",
+            "mssql://user:pass@localhost:4321/db?param1=one&param1=three&param2=",
+            False,
+        ),
+        (
+            "mssql://user:pass@localhost:4321/db?opensafely_include_t1oo&param2=two",
+            "mssql://user:pass@localhost:4321/db?param2=two",
+            False,
+        ),
+        (
+            "mssql://user:pass@localhost:4321/db?opensafely_include_t1oo=false",
+            "mssql://user:pass@localhost:4321/db",
+            False,
+        ),
+        (
+            "mssql://user:pass@localhost:4321/db?opensafely_include_t1oo=true",
+            "mssql://user:pass@localhost:4321/db",
+            True,
+        ),
+        (
+            "mssql://user:pass@localhost:4321/db?opensafely_include_t1oo=True",
+            "mssql://user:pass@localhost:4321/db",
+            True,
+        ),
+    ],
+)
+def test_tpp_backend_modify_dsn(dsn_in, dsn_out, t1oo_status):
+    backend = TPPBackend(database_url=dsn_in, covariate_definitions=None)
+    assert backend.database_url == dsn_out
+    assert backend.include_t1oo == t1oo_status
+
+
+@pytest.mark.parametrize(
+    "dsn",
+    [
+        "mssql://user:pass@localhost:4321/db?opensafely_include_t1oo=false&opensafely_include_t1oo=false",
+        "mssql://user:pass@localhost:4321/db?opensafely_include_t1oo=false&opensafely_include_t1oo",
+    ],
+)
+def test_tpp_backend_modify_dsn_rejects_duplicate_params(dsn):
+    with pytest.raises(ValueError, match="must not be supplied more than once"):
+        TPPBackend(database_url=dsn, covariate_definitions=None)
+
+
 @pytest.mark.parametrize("format", ["csv", "csv.gz", "feather", "dta", "dta.gz"])
 def test_minimal_study_to_file(tmp_path, format):
     session = make_session()
@@ -185,7 +250,6 @@ def test_minimal_study_with_reserved_keywords():
 
 def test_minimal_study_with_t1oo_default():
     # Test that type 1 opt-outs are excluded by default
-    assert "OPENSAFELY_INCLUDE_T1OO" not in os.environ
     session = make_session()
     patient_1 = Patient(Patient_ID=1, DateOfBirth="1980-01-01", Sex="M")
     patient_2 = Patient(Patient_ID=2, DateOfBirth="1965-01-01", Sex="F")
@@ -212,8 +276,8 @@ def test_minimal_study_with_t1oo_default():
         ("true", ["1", "2", "3", "4"]),
     ],
 )
-def test_minimal_study_with_t1oo_flag(flag, expected, include_t1oo):
-    os.environ["OPENSAFELY_INCLUDE_T1OO"] = flag
+def test_minimal_study_with_t1oo_flag(set_database_url_with_t1oo, flag, expected):
+    set_database_url_with_t1oo(flag)
     # Test that type 1 opt-outs are only included if flag is explicitly set to "True"
     session = make_session()
     patient_1 = Patient(Patient_ID=1, DateOfBirth="1980-01-01", Sex="M")

From b5ddcf56802643811cdd587cbb058efb58a9e2a3 Mon Sep 17 00:00:00 2001
From: David Evans <dave.evans@thedatalab.org>
Date: Wed, 13 Sep 2023 20:25:28 +0100
Subject: [PATCH 6/7] Temporarily change default

This involves reverting some of the test changes. These are all
re-reverted back again in a follow-up PR.
---
 cohortextractor/tpp_backend.py |  3 ++-
 tests/test_logging.py          | 19 +++++++++----------
 tests/test_tpp_backend.py      | 21 ++-------------------
 3 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/cohortextractor/tpp_backend.py b/cohortextractor/tpp_backend.py
index b896d747..62c61239 100644
--- a/cohortextractor/tpp_backend.py
+++ b/cohortextractor/tpp_backend.py
@@ -41,7 +41,8 @@
 class TPPBackend:
     _db_connection = None
     _current_column_name = None
-    include_t1oo = False
+    # TODO: Temporary default to support safe deployment
+    include_t1oo = True
 
     def __init__(
         self,
diff --git a/tests/test_logging.py b/tests/test_logging.py
index ac307dc6..f55d6676 100644
--- a/tests/test_logging.py
+++ b/tests/test_logging.py
@@ -117,11 +117,10 @@ def test_study_definition_initial_stats_logging(logger):
     )
     assert get_stats_logs(logger.entries) == [
         # output columns include patient_id, and the 4 variables defined in the
-        # study definition - population, event_date_1, event_min_date and event_date_2,
-        # which is defined as a parameter to event_min_date
+        # study defniiton, including event_date_2, which is defined as a parameter to
+        # event_min_date
         # tables - Patient, temp event table for each codelist
-        # joins - Patient to each event table, and t1oo
-        {"output_column_count": 5, "table_count": 3, "table_joins_count": 3},
+        {"output_column_count": 5, "table_count": 3, "table_joins_count": 2},
         # variable_count is a count of the top-level variables defined in the study def (i.e. not event_date_2)
         {"variable_count": 4},
         # 2 variables use a codelist (event_date_1, and the nested event_date_2)
@@ -145,10 +144,10 @@ def test_stats_logging_tpp_backend(logger):
 
     # initial stats
     expected_initial_study_def_logs = [
-        # output columns include patient_id, and the 2 variables (population, event)
-        # defined in the study defniiton
-        # tables - Patient, temp event table, t1oo for codelist
-        {"output_column_count": 3, "table_count": 2, "table_joins_count": 2},
+        # output columns include patient_id, and the 2 variables defined in the
+        # study defniiton
+        # tables - Patient, temp event table for codelist
+        {"output_column_count": 3, "table_count": 2, "table_joins_count": 1},
         {"variable_count": 2},
         {"variables_using_codelist_count": 1},
         {"variable_using_codelist": "event", "codelist_size": 1},
@@ -299,7 +298,7 @@ def test_stats_logging_generate_cohort(
     expected_initial_study_def_logs = [
         # these 3 are logged from StudyDefinition instantiation
         # patient_id, population, sex - all from patient table, but we make one temp # table per variable
-        {"output_column_count": 3, "table_count": 2, "table_joins_count": 2},
+        {"output_column_count": 3, "table_count": 2, "table_joins_count": 1},
         {"variable_count": 2},  # population, sex
         {"variables_using_codelist_count": 0},
         # index_date_count logged from generate_cohort
@@ -451,7 +450,7 @@ def test_stats_logging_generate_cohort_with_index_dates(
         {"index_date_count": 3},
         {"min_index_date": "2020-01-01", "max_index_date": "2020-03-01"},
         # output_column/table/joins_count is logged in tpp_backend on backend instantiation so it's repeated for each index date
-        *[{"output_column_count": 3, "table_count": 2, "table_joins_count": 2}] * 4,
+        *[{"output_column_count": 3, "table_count": 2, "table_joins_count": 1}] * 4,
         *[
             {"resetting_backend_index_date": ix_date}
             for ix_date in expected_index_dates
diff --git a/tests/test_tpp_backend.py b/tests/test_tpp_backend.py
index dc8baa3f..99dd5b51 100644
--- a/tests/test_tpp_backend.py
+++ b/tests/test_tpp_backend.py
@@ -152,12 +152,12 @@ def setup_function(function):
         (
             "mssql://user:pass@localhost:4321/db",
             "mssql://user:pass@localhost:4321/db",
-            False,
+            True,
         ),
         (
             "mssql://user:pass@localhost:4321/db?param1=one&param2&param1=three",
             "mssql://user:pass@localhost:4321/db?param1=one&param1=three&param2=",
-            False,
+            True,
         ),
         (
             "mssql://user:pass@localhost:4321/db?opensafely_include_t1oo&param2=two",
@@ -248,23 +248,6 @@ def test_minimal_study_with_reserved_keywords():
     assert_results(study.to_dicts(), all=["M", "F"], asc=["40", "55"])
 
 
-def test_minimal_study_with_t1oo_default():
-    # Test that type 1 opt-outs are excluded by default
-    session = make_session()
-    patient_1 = Patient(Patient_ID=1, DateOfBirth="1980-01-01", Sex="M")
-    patient_2 = Patient(Patient_ID=2, DateOfBirth="1965-01-01", Sex="F")
-    t1oo_1 = T1OO(Patient_ID=1)
-    session.add_all([patient_1, patient_2, t1oo_1])
-    session.commit()
-    study = StudyDefinition(
-        population=patients.all(),
-        all=patients.sex(),
-        asc=patients.age_as_of("2020-01-01"),
-    )
-    # patient_1 (M, age 40) excluded
-    assert_results(study.to_dicts(), all=["F"], asc=["55"])
-
-
 @pytest.mark.parametrize(
     "flag,expected",
     [

From dad5387447fc6b9763ff38bcf1271d719c3593af Mon Sep 17 00:00:00 2001
From: Becky Smith <becky.smith@thedatalab.org>
Date: Thu, 26 Oct 2023 11:54:34 +0100
Subject: [PATCH 7/7] Update t1oo table name

---
 cohortextractor/tpp_backend.py | 8 ++++++--
 tests/test_tpp_backend.py      | 8 ++++----
 tests/tpp_backend_setup.py     | 4 ++--
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/cohortextractor/tpp_backend.py b/cohortextractor/tpp_backend.py
index 62c61239..164e7dd7 100644
--- a/cohortextractor/tpp_backend.py
+++ b/cohortextractor/tpp_backend.py
@@ -37,6 +37,8 @@
 SLEEP = 4
 BACKOFF_FACTOR = 4
 
+T1OO_TABLE = "PatientsWithTypeOneDissent"
+
 
 class TPPBackend:
     _db_connection = None
@@ -421,8 +423,10 @@ def get_t1oo_exclude_expressions():
             # Otherwise we add an extra LEFT OUTER JOIN on the T1OO table and
             # WHERE clause which will exclude any patient IDs found in the T1OO table
             return (
-                [f"LEFT OUTER JOIN T1OO ON T1OO.Patient_ID = {patient_id_expr}"],
-                ["T1OO.Patient_ID IS null"],
+                [
+                    f"LEFT OUTER JOIN {T1OO_TABLE} ON {T1OO_TABLE}.Patient_ID = {patient_id_expr}"
+                ],
+                [f"{T1OO_TABLE}.Patient_ID IS null"],
             )
 
         t100_join, t1oo_where = get_t1oo_exclude_expressions()
diff --git a/tests/test_tpp_backend.py b/tests/test_tpp_backend.py
index 99dd5b51..16b7cfd0 100644
--- a/tests/test_tpp_backend.py
+++ b/tests/test_tpp_backend.py
@@ -30,7 +30,6 @@
     EC,
     ICNARC,
     OPA,
-    T1OO,
     UKRR,
     APCS_Der,
     Appointment,
@@ -55,6 +54,7 @@
     Organisation,
     Patient,
     PatientAddress,
+    PatientsWithTypeOneDissent,
     PotentialCareHomeAddress,
     RegistrationHistory,
     SGSS_AllTests_Negative,
@@ -141,7 +141,7 @@ def setup_function(function):
     session.query(UKRR).delete()
     session.query(Patient).delete()
     session.query(BuildProgress).delete()
-    session.query(T1OO).delete()
+    session.query(PatientsWithTypeOneDissent).delete()
 
     session.commit()
 
@@ -267,8 +267,8 @@ def test_minimal_study_with_t1oo_flag(set_database_url_with_t1oo, flag, expected
     patient_2 = Patient(Patient_ID=2, DateOfBirth="1965-01-01", Sex="F")
     patient_3 = Patient(Patient_ID=3, DateOfBirth="1975-01-01", Sex="F")
     patient_4 = Patient(Patient_ID=4, DateOfBirth="1985-01-01", Sex="F")
-    t1oo_2 = T1OO(Patient_ID=2)
-    t1oo_3 = T1OO(Patient_ID=3)
+    t1oo_2 = PatientsWithTypeOneDissent(Patient_ID=2)
+    t1oo_3 = PatientsWithTypeOneDissent(Patient_ID=3)
     session.add_all([patient_1, patient_2, patient_3, patient_4, t1oo_2, t1oo_3])
     session.commit()
     study = StudyDefinition(
diff --git a/tests/tpp_backend_setup.py b/tests/tpp_backend_setup.py
index ca342aac..b2e3f32e 100644
--- a/tests/tpp_backend_setup.py
+++ b/tests/tpp_backend_setup.py
@@ -1057,8 +1057,8 @@ class VmpMapping(Base):
     prev_id = Column(String(collation="Latin1_General_CI_AS"))
 
 
-class T1OO(Base):
-    __tablename__ = "T1OO"
+class PatientsWithTypeOneDissent(Base):
+    __tablename__ = "PatientsWithTypeOneDissent"
     # fake pk to satisfy the ORM
     # Patient_ID might be the primary key, TBC
     pk = Column(Integer, primary_key=True)