From 664ebef4292984aaad5725e45d03050432950d66 Mon Sep 17 00:00:00 2001
From: Matt Garber <matthew.garber@childrens.harvard.edu>
Date: Tue, 6 Feb 2024 13:58:21 -0500
Subject: [PATCH 1/7] Ruff

---
 .github/pull_request_template.md              |  1 -
 .github/workflows/ci.yaml                     |  4 +-
 .pre-commit-config.yaml                       | 15 ++--
 cumulus_library/base_table_builder.py         | 13 ++--
 cumulus_library/base_utils.py                 | 14 ++--
 cumulus_library/cli.py                        | 25 +++---
 cumulus_library/cli_parser.py                 | 18 +++--
 cumulus_library/databases.py                  | 41 ++++++----
 cumulus_library/parsers/fhir_valueset.py      | 16 ++--
 cumulus_library/schema/columns.py             | 17 ++--
 cumulus_library/schema/valueset.py            |  3 +-
 cumulus_library/statistics/counts.py          | 22 +++---
 cumulus_library/statistics/psm.py             | 41 +++++-----
 .../studies/core/builder_condition.py         |  4 +-
 .../studies/core/builder_documentreference.py |  7 +-
 .../studies/core/builder_encounter.py         |  4 +-
 .../studies/core/builder_medication.py        |  2 +-
 .../studies/core/builder_medicationrequest.py |  5 +-
 .../studies/core/builder_observation.py       |  5 +-
 .../studies/core/builder_patient.py           |  5 +-
 .../core/core_templates/core_templates.py     |  5 +-
 cumulus_library/studies/core/count_core.py    |  8 +-
 .../studies/discovery/code_detection.py       | 23 +++---
 .../studies/vocab/vocab_icd_builder.py        |  3 +-
 cumulus_library/study_parser.py               | 77 +++++++++----------
 .../template_sql/base_templates.py            | 31 ++++----
 cumulus_library/template_sql/sql_utils.py     | 14 ++--
 .../statistics/counts_templates.py            |  9 +--
 .../template_sql/statistics/psm_templates.py  | 21 ++---
 cumulus_library/upload.py                     |  2 -
 pyproject.toml                                | 24 +++++-
 tests/conftest.py                             | 18 ++---
 tests/regression/run_regression.py            |  7 +-
 tests/test_cli.py                             | 48 +++++++-----
 tests/test_conftest.py                        | 17 ++--
 tests/test_core.py                            |  8 +-
 tests/test_duckdb.py                          |  3 +-
 tests/test_parsers.py                         |  4 +-
 tests/test_psm.py                             |  1 -
 tests/test_psm_templates.py                   |  2 +-
 tests/test_study_parser.py                    |  7 +-
 tests/test_template_sql_utils.py              |  4 +-
 tests/test_templates.py                       |  1 -
 43 files changed, 306 insertions(+), 293 deletions(-)

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 73705402..c8a451b1 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -2,5 +2,4 @@
 ### Checklist
 - [ ] Consider if documentation (like in `docs/`) needs to be updated
 - [ ] Consider if tests should be added
-- [ ] Run pylint if you're making changes beyond adding studies
 - [ ] Update template repo if there are changes to study configuration
\ No newline at end of file
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 24d970c4..2c017949 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -47,10 +47,10 @@ jobs:
       - name: Run sqlfluff on jinja templates
         run: |
           sqlfluff lint
-      - name: Run black
+      - name: Run ruff
         if: success() || failure() # still run black if above checks fails
         run: |
-          black --check --verbose .
+          ruff
   regression:
     runs-on: ubuntu-22.04
     permissions:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 71f0d9cc..84a4db32 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,15 +1,10 @@
 repos:
-  - repo: https://github.com/psf/black
-    #this version is synced with the black mentioned in .github/workflows/ci.yml
-    rev: 23.10.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.2.1
     hooks:
-      - id: black
-        entry: bash -c 'black "$@"; git add -u' --
-        # It is recommended to specify the latest version of Python
-        # supported by your project here, or alternatively use
-        # pre-commit's default_language_version, see
-        # https://pre-commit.com/#top_level-default_language_version
-        language_version: python3.9
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
 
   - repo: https://github.com/sqlfluff/sqlfluff
     rev: 2.3.4
diff --git a/cumulus_library/base_table_builder.py b/cumulus_library/base_table_builder.py
index 66646500..0786bf4a 100644
--- a/cumulus_library/base_table_builder.py
+++ b/cumulus_library/base_table_builder.py
@@ -3,12 +3,11 @@
 import pathlib
 import re
 import sys
-
 from abc import ABC, abstractmethod
 from typing import final
 
-from cumulus_library.databases import DatabaseCursor
 from cumulus_library import base_utils
+from cumulus_library.databases import DatabaseCursor
 
 
 class BaseTableBuilder(ABC):
@@ -70,8 +69,8 @@ def execute_queries(
                         )
 
                     table_name = table_name[0]
-                    # if it contains a schema, remove it (usually it won't, but some CTAS
-                    # forms may)
+                    # if it contains a schema, remove it (usually it won't, but some
+                    # CTAS forms may)
                     if "." in table_name:
                         table_name = table_name.split(".")[1].replace('"', "")
                     table_names.append(table_name)
@@ -94,7 +93,7 @@ def execute_queries(
 
         self.post_execution(cursor, schema, verbose, drop_table, *args, **kwargs)
 
-    def post_execution(
+    def post_execution(  # noqa: B027 - this looks like, but is not, an abstract method
         self,
         cursor: DatabaseCursor,
         schema: str,
@@ -122,7 +121,9 @@ def comment_queries(self, doc_str=None):
         commented_queries.pop()
         self.queries = commented_queries
 
-    def write_queries(self, path: pathlib.Path = pathlib.Path.cwd() / "output.sql"):
+    def write_queries(self, path: pathlib.Path | None = None):
+        if path is None:
+            path = pathlib.Path.cwd() / "output.sql"
         """writes all queries constructed by prepare_queries to disk"""
         path.parents[0].mkdir(parents=True, exist_ok=True)
         with open(path, "w", encoding="utf-8") as file:
diff --git a/cumulus_library/base_utils.py b/cumulus_library/base_utils.py
index 84fc044e..213b9880 100644
--- a/cumulus_library/base_utils.py
+++ b/cumulus_library/base_utils.py
@@ -1,11 +1,9 @@
 """ Collection of small commonly used utility functions """
 
 import datetime
-import os
 import json
-
+import os
 from contextlib import contextmanager
-from typing import List
 
 from rich import progress
 
@@ -15,16 +13,16 @@ def filepath(filename: str) -> str:
 
 
 def load_text(path: str) -> str:
-    with open(path, "r", encoding="UTF-8") as fp:
+    with open(path, encoding="UTF-8") as fp:
         return fp.read()
 
 
 def load_json(path: str) -> dict:
-    with open(path, "r", encoding="UTF-8") as fp:
+    with open(path, encoding="UTF-8") as fp:
         return json.load(fp)
 
 
-def parse_sql(sql_text: str) -> List[str]:
+def parse_sql(sql_text: str) -> list[str]:
     commands = []
 
     for statement in sql_text.split(";"):
@@ -36,11 +34,11 @@ def parse_sql(sql_text: str) -> List[str]:
     return filter_strip(commands)
 
 
-def filter_strip(commands) -> List[str]:
+def filter_strip(commands) -> list[str]:
     return list(filter(None, [c.strip() for c in commands]))
 
 
-def list_coding(code_display: dict, system=None) -> List[dict]:
+def list_coding(code_display: dict, system=None) -> list[dict]:
     as_list = []
     for code, display in code_display.items():
         if system:
diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py
index c5465649..6c3ec07f 100755
--- a/cumulus_library/cli.py
+++ b/cumulus_library/cli.py
@@ -7,18 +7,15 @@
 import sys
 import sysconfig
 
-
-from typing import Dict, List, Optional
-
 import rich
 
 from cumulus_library import (
     __version__,
+    base_utils,
     cli_parser,
     databases,
     enums,
     errors,
-    base_utils,
     protected_table_builder,
     study_parser,
     upload,
@@ -60,8 +57,8 @@ def update_transactions(self, prefix: str, status: str):
 
     def clean_study(
         self,
-        targets: List[str],
-        study_dict: Dict,
+        targets: list[str],
+        study_dict: dict,
         *,
         stats_clean: bool,
         prefix: bool = False,
@@ -106,7 +103,7 @@ def clean_and_build_study(
         target: pathlib.Path,
         *,
         stats_build: bool,
-        continue_from: str = None,
+        continue_from: str | None = None,
     ) -> None:
         """Recreates study views/tables
 
@@ -176,7 +173,7 @@ def run_single_table_builder(
             parser=self.db.parser(),
         )
 
-    def clean_and_build_all(self, study_dict: Dict, stats_build: bool) -> None:
+    def clean_and_build_all(self, study_dict: dict, stats_build: bool) -> None:
         """Builds views for all studies.
 
         NOTE: By design, this method will always exclude the `template` study dir,
@@ -206,7 +203,7 @@ def export_study(self, target: pathlib.Path, data_path: pathlib.Path) -> None:
         studyparser = study_parser.StudyManifestParser(target, data_path)
         studyparser.export_study(self.db, data_path)
 
-    def export_all(self, study_dict: Dict, data_path: pathlib.Path):
+    def export_all(self, study_dict: dict, data_path: pathlib.Path):
         """Exports all defined count tables to disk"""
         for key in study_dict.keys():
             self.export_study(study_dict[key], data_path)
@@ -254,7 +251,7 @@ def create_template(path: str) -> None:
         dest_path.write_bytes(source_path.read_bytes())
 
 
-def get_study_dict(alt_dir_paths: List) -> Optional[Dict[str, pathlib.Path]]:
+def get_study_dict(alt_dir_paths: list) -> dict[str, pathlib.Path] | None:
     """Gets valid study targets from ./studies/, and any pip installed studies
 
     :returns: A list of Path objects
@@ -264,11 +261,12 @@ def get_study_dict(alt_dir_paths: List) -> Optional[Dict[str, pathlib.Path]]:
 
     # first, we'll get any installed public studies
     with open(
-        pathlib.Path(cli_path, "./module_allowlist.json"), "r", encoding="utf-8"
+        pathlib.Path(cli_path, "./module_allowlist.json"), encoding="utf-8"
     ) as study_allowlist_json:
         study_allowlist = json.load(study_allowlist_json)["allowlist"]
-    site_packages_dir = sysconfig.get_path("purelib")
+    site_packages_dir = "".join(sysconfig.get_path("purelib"))
     for study, subdir in study_allowlist.items():
+        print(site_packages_dir)
         study_path = pathlib.Path(site_packages_dir, subdir)
         if study_path.exists():
             manifest_studies[study] = study_path
@@ -295,7 +293,7 @@ def get_studies_by_manifest_path(path: pathlib.Path) -> dict:
     return manifest_paths
 
 
-def run_cli(args: Dict):
+def run_cli(args: dict):
     """Controls which library tasks are run based on CLI arguments"""
     if args["action"] == "create":
         create_template(args["create_dir"])
@@ -312,7 +310,6 @@ def run_cli(args: Dict):
                 runner.verbose = True
             print("Testing connection to database...")
             runner.cursor.execute("SHOW DATABASES")
-
             study_dict = get_study_dict(args["study_dir"])
             if "prefix" not in args.keys():
                 if args["target"]:
diff --git a/cumulus_library/cli_parser.py b/cumulus_library/cli_parser.py
index 5604936b..7ecdddef 100644
--- a/cumulus_library/cli_parser.py
+++ b/cumulus_library/cli_parser.py
@@ -29,10 +29,10 @@ def add_study_dir_argument(parser: argparse.ArgumentParser) -> None:
         action="append",
         help=(
             "Optionally add one or more directories to look for study definitions in. "
-            "Default is in project directory and CUMULUS_LIBRARY_STUDY_DIR, if present, "
-            "followed by any supplied paths. Target, and all its subdirectories, "
-            "are checked for manifests. Overriding studies with the same namespace "
-            "supersede earlier ones."
+            "Default is in project directory and CUMULUS_LIBRARY_STUDY_DIR, "
+            "if present, followed by any supplied paths. Target, and all its "
+            "subdirectories, are checked for manifests. Overriding studies with the"
+            " same namespace supersede earlier ones."
         ),
     )
 
@@ -87,10 +87,14 @@ def add_db_config(parser: argparse.ArgumentParser) -> None:
     )
     group.add_argument(
         "--database",
-        # In Athena, we use this as the schema_name (which is also called a Database in their UX).
+        # In Athena, we use this as the schema_name (which is also called a Database
+        # in their UX).
+        #
         # In DuckDB, we use this as the path to the filename to store tables.
-        # Since we started as an Athena-centric codebase, we mostly keep referring to this as
-        # name "schema_name". But to the user, both uses are still conceptually a "database".
+        #
+        # Since we started as an Athena-centric codebase, we mostly keep referring to
+        # this as name "schema_name". But to the user, both uses are still conceptually
+        # a "database".
         dest="schema_name",
         help="Database name (for Athena) or file (for DuckDB)",
     )
diff --git a/cumulus_library/databases.py b/cumulus_library/databases.py
index 858a7156..17bc6ca5 100644
--- a/cumulus_library/databases.py
+++ b/cumulus_library/databases.py
@@ -14,7 +14,7 @@
 import os
 import sys
 from pathlib import Path
-from typing import Optional, Protocol, Union
+from typing import Protocol
 
 import cumulus_fhir_support
 import duckdb
@@ -31,13 +31,13 @@ class DatabaseCursor(Protocol):
     def execute(self, sql: str) -> None:
         pass
 
-    def fetchone(self) -> Optional[list]:
+    def fetchone(self) -> list | None:
         pass
 
-    def fetchmany(self, size: Optional[int]) -> Optional[list[list]]:
+    def fetchmany(self, size: int | None) -> list[list] | None:
         pass
 
-    def fetchall(self) -> Optional[list[list]]:
+    def fetchall(self) -> list[list] | None:
         pass
 
 
@@ -131,6 +131,7 @@ def execute_as_pandas(self, sql: str) -> pandas.DataFrame:
     def parser(self) -> DatabaseParser:
         """Returns parser object for interrogating DB schemas"""
 
+    @abc.abstractmethod
     def close(self) -> None:
         """Clean up any resources necessary"""
 
@@ -174,6 +175,9 @@ def execute_as_pandas(self, sql: str) -> pandas.DataFrame:
     def parser(self) -> DatabaseParser:
         return AthenaParser()
 
+    def close(self) -> None:
+        return self.connection.close()
+
 
 class AthenaParser(DatabaseParser):
     def validate_table_schema(
@@ -190,7 +194,7 @@ def __init__(self, db_file: str):
         super().__init__("main")
         self.connection = duckdb.connect(db_file)
         # Aliasing Athena's as_pandas to duckDB's df cast
-        setattr(duckdb.DuckDBPyConnection, "as_pandas", duckdb.DuckDBPyConnection.df)
+        duckdb.DuckDBPyConnection.as_pandas = duckdb.DuckDBPyConnection.df
 
         # Paper over some syntax differences between Athena and DuckDB
         self.connection.create_function(
@@ -238,22 +242,24 @@ def __init__(self, db_file: str):
         )
 
     def insert_tables(self, tables: dict[str, pyarrow.Table]) -> None:
-        """Ingests all ndjson data from a folder tree (often the output folder of Cumulus ETL)"""
+        """Ingests all ndjson data from a folder tree.
+
+        This is often the output folder of Cumulus ETL"""
         for name, table in tables.items():
             self.connection.register(name, table)
 
     @staticmethod
     def _compat_array_join(
-        value: Optional[list[Optional[str]]], delimiter: str
-    ) -> Optional[str]:
+        value: list[str | None] | None, delimiter: str
+    ) -> str | None:
         if value is None:
             return None
         return delimiter.join(v for v in value if v is not None)
 
     @staticmethod
     def _compat_date(
-        value: Union[str, datetime.datetime, datetime.date, None]
-    ) -> Optional[datetime.date]:
+        value: str | datetime.datetime | datetime.date | None,
+    ) -> datetime.date | None:
         if value is None:
             return None
         elif isinstance(value, str):
@@ -266,14 +272,14 @@ def _compat_date(
             raise ValueError("Unexpected date() argument:", type(value), value)
 
     @staticmethod
-    def _compat_to_utf8(value: Optional[str]) -> Optional[datetime.date]:
+    def _compat_to_utf8(value: str | None) -> datetime.date | None:
         """See the create_function() call for to_utf8 for more background"""
         return value
 
     @staticmethod
     def _compat_from_iso8601_timestamp(
-        value: Optional[str],
-    ) -> Optional[datetime.datetime]:
+        value: str | None,
+    ) -> datetime.datetime | None:
         if value is None:
             return None
 
@@ -329,12 +335,13 @@ def read_ndjson_dir(path: str) -> dict[str, pyarrow.Table]:
     """Loads a directory tree of raw ndjson into schema-ful tables.
 
     :param path: a directory path
-    :returns: dictionary of table names (like 'documentreference') to table data (with schema)
+    :returns: dictionary of table names (like 'documentreference') to table
+      data (with schema)
     """
     all_tables = {}
 
-    # Manually specify the list of resources because we want to create each table even if the
-    # folder does not exist.
+    # Manually specify the list of resources because we want to create each table
+    # even if the folder does not exist.
     resources = [
         "AllergyIntolerance",
         "Condition",
@@ -364,7 +371,7 @@ def read_ndjson_dir(path: str) -> dict[str, pyarrow.Table]:
         # Read all ndjson directly into memory
         rows = []
         for filename in filenames:
-            with open(filename, "r", encoding="utf8") as f:
+            with open(filename, encoding="utf8") as f:
                 for line in f:
                     rows.append(json.loads(line))
 
diff --git a/cumulus_library/parsers/fhir_valueset.py b/cumulus_library/parsers/fhir_valueset.py
index a11eaccf..a16632cb 100644
--- a/cumulus_library/parsers/fhir_valueset.py
+++ b/cumulus_library/parsers/fhir_valueset.py
@@ -1,17 +1,15 @@
-import os
-from typing import List
-
 from fhirclient.models.coding import Coding
 
 from cumulus_library import base_utils
 from cumulus_library.template_sql import base_templates
 
 
-def get_include_coding(valueset_json) -> List[Coding]:
-    """
-    Obtain a list of Coding "concepts" from a ValueSet.
+def get_include_coding(valueset_json) -> list[Coding]:
+    """Obtain a list of Coding "concepts" from a ValueSet.
+
     This method currently supports only "include" of "concept" defined fields.
-    Not supported: recursive fetching of contained ValueSets, which requires UMLS API Key and Wget, etc.
+    Not supported: recursive fetching of contained ValueSets, which requires UMLS
+    API Key and Wget, etc.
 
     examples
     https://vsac.nlm.nih.gov/valueset/2.16.840.1.113762.1.4.1146.1629/expansion/Latest
@@ -31,7 +29,7 @@ def get_include_coding(valueset_json) -> List[Coding]:
     return parsed
 
 
-def create_view_sql(view_name: str, concept_list: List[Coding]) -> str:
+def create_view_sql(view_name: str, concept_list: list[Coding]) -> str:
     """
     :param view_name: like study__define_type
     :param concept_list: list of concepts to include in definition
@@ -57,7 +55,7 @@ def create_view_sql(view_name: str, concept_list: List[Coding]) -> str:
 """
 
 
-def write_view_sql(view_name: str, concept_list: List[Coding]) -> None:
+def write_view_sql(view_name: str, concept_list: list[Coding]) -> None:
     """Convenience wrapper for writing create_view_sql to disk"""
     with open(f"{view_name}.sql", "w") as fp:
         fp.write(create_view_sql(view_name, concept_list))
diff --git a/cumulus_library/schema/columns.py b/cumulus_library/schema/columns.py
index 3810b6ab..38e6fece 100644
--- a/cumulus_library/schema/columns.py
+++ b/cumulus_library/schema/columns.py
@@ -1,10 +1,15 @@
 # pylint: disable=W,C,R
-from enum import Enum, EnumMeta
-from cumulus_library.schema.typesystem import Datatypes, Coding, Vocab
-from cumulus_library.schema.valueset import Gender, Race, Ethnicity
-from cumulus_library.schema.valueset import DurationUnits
-from cumulus_library.schema.valueset import EncounterCode
-from cumulus_library.schema.valueset import ObservationInterpretationDetection
+from enum import Enum
+
+from cumulus_library.schema.typesystem import Datatypes, Vocab
+from cumulus_library.schema.valueset import (
+    DurationUnits,
+    EncounterCode,
+    Ethnicity,
+    Gender,
+    ObservationInterpretationDetection,
+    Race,
+)
 
 
 class ColumnEnum(Enum):
diff --git a/cumulus_library/schema/valueset.py b/cumulus_library/schema/valueset.py
index 0c157426..4c5ed990 100644
--- a/cumulus_library/schema/valueset.py
+++ b/cumulus_library/schema/valueset.py
@@ -1,6 +1,7 @@
 # pylint: disable=W,C,R
 from enum import Enum
-from cumulus_library.schema.typesystem import Coding, Vocab
+
+from cumulus_library.schema.typesystem import Coding
 
 ################################################################################
 # FHIR ValueSets
diff --git a/cumulus_library/statistics/counts.py b/cumulus_library/statistics/counts.py
index 7b3adb04..0d0707f9 100644
--- a/cumulus_library/statistics/counts.py
+++ b/cumulus_library/statistics/counts.py
@@ -1,20 +1,18 @@
 """Class for generating counts tables from templates"""
 
 import sys
-
 from pathlib import Path
-from typing import Union
 
 from cumulus_library.base_table_builder import BaseTableBuilder
+from cumulus_library.errors import CountsBuilderError
 from cumulus_library.study_parser import StudyManifestParser
 from cumulus_library.template_sql.statistics import counts_templates
-from cumulus_library.errors import CountsBuilderError
 
 
 class CountsBuilder(BaseTableBuilder):
     """Extends BaseTableBuilder for counts-related use cases"""
 
-    def __init__(self, study_prefix: str = None):
+    def __init__(self, study_prefix: str | None = None):
         if study_prefix is None:
             # This slightly wonky approach will give us the path of the
             # directory of a class extending the CountsBuilder
@@ -44,7 +42,7 @@ def get_table_name(self, table_name: str, duration=None) -> str:
             return f"{self.study_prefix}__{table_name}"
 
     def get_where_clauses(
-        self, clause: Union[list, str, None] = None, min_subject: int = 10
+        self, clause: list | str | None = None, min_subject: int = 10
     ) -> str:
         """Convenience method for constructing arbitrary where clauses.
 
@@ -101,7 +99,7 @@ def count_condition(
         table_name: str,
         source_table: str,
         table_cols: list,
-        where_clauses: Union[list, None] = None,
+        where_clauses: list | None = None,
         min_subject: int = 10,
     ) -> str:
         """wrapper method for constructing condition counts tables
@@ -128,7 +126,7 @@ def count_documentreference(
         table_name: str,
         source_table: str,
         table_cols: list,
-        where_clauses: Union[list, None] = None,
+        where_clauses: list | None = None,
         min_subject: int = 10,
     ) -> str:
         """wrapper method for constructing documentreference counts tables
@@ -155,7 +153,7 @@ def count_encounter(
         table_name: str,
         source_table: str,
         table_cols: list,
-        where_clauses: Union[list, None] = None,
+        where_clauses: list | None = None,
         min_subject: int = 10,
     ) -> str:
         """wrapper method for constructing encounter counts tables
@@ -181,7 +179,7 @@ def count_medicationrequest(
         table_name: str,
         source_table: str,
         table_cols: list,
-        where_clauses: Union[list, None] = None,
+        where_clauses: list | None = None,
         min_subject: int = 10,
     ) -> str:
         """wrapper method for constructing medicationrequests counts tables
@@ -207,7 +205,7 @@ def count_observation(
         table_name: str,
         source_table: str,
         table_cols: list,
-        where_clauses: Union[list, None] = None,
+        where_clauses: list | None = None,
         min_subject: int = 10,
     ) -> str:
         """wrapper method for constructing observation counts tables
@@ -233,7 +231,7 @@ def count_patient(
         table_name: str,
         source_table: str,
         table_cols: list,
-        where_clauses: Union[list, None] = None,
+        where_clauses: list | None = None,
         min_subject: int = 10,
     ) -> str:
         """wrapper method for constructing patient counts tables
@@ -266,7 +264,7 @@ def write_counts(self, filepath: str):
         self.comment_queries()
         self.write_queries(filename=filepath)
 
-    def prepare_queries(self, cursor: object = None, schema: str = None):
+    def prepare_queries(self, cursor: object | None = None, schema: str | None = None):
         """Stub implementing abstract base class
 
         This should be overridden in any count generator. See studies/core/count_core.py
diff --git a/cumulus_library/statistics/psm.py b/cumulus_library/statistics/psm.py
index 00a2f059..0e05b223 100644
--- a/cumulus_library/statistics/psm.py
+++ b/cumulus_library/statistics/psm.py
@@ -5,21 +5,18 @@
 import pathlib
 import sys
 import warnings
-
 from dataclasses import dataclass
 
+import matplotlib.pyplot as plt
 import pandas
+import seaborn as sns
 import toml
-
 from psmpy import PsmPy
 
 # these imports are mimicing PsmPy imports for re-implemented functions
 from psmpy.functions import cohenD
-import matplotlib.pyplot as plt
-import seaborn as sns
 
-from cumulus_library import databases
-from cumulus_library import base_table_builder
+from cumulus_library import base_table_builder, databases
 from cumulus_library.template_sql import base_templates
 from cumulus_library.template_sql.statistics import psm_templates
 
@@ -86,7 +83,7 @@ def __init__(self, toml_config_path: str, data_path: pathlib.Path):
             )
         except KeyError:
             sys.exit(
-                f"PSM configuration at {toml_config_path} contains missing/invalid keys."
+                f"PSM configuration {toml_config_path} contains missing/invalid keys."
                 "Check the PSM documentation for an example config with more details:\n"
                 "https://docs.smarthealthit.org/cumulus/library/statistics/propensity-score-matching.html"
             )
@@ -190,8 +187,8 @@ def psm_plot_match(
         Title="Side by side matched controls",
         Ylabel="Number of patients",
         Xlabel="propensity logit",
-        names=["positive_cohort", "negative_cohort"],
-        colors=["#E69F00", "#56B4E9"],
+        names=["positive_cohort", "negative_cohort"],  # noqa: B006
+        colors=["#E69F00", "#56B4E9"],  # noqa: B006
         save=True,
         filename="propensity_match.png",
     ):
@@ -214,13 +211,13 @@ def psm_plot_match(
         plt.xlabel(Xlabel)
         plt.ylabel(Ylabel)
         plt.title(Title)
-        if save == True:
+        if save:
             plt.savefig(filename, dpi=250)
 
     def psm_effect_size_plot(
         self,
         psm,
-        title="Standardized Mean differences accross covariates before and after matching",
+        title="Standardized Mean differences accross covariates before and after matching",  # noqa: E501
         before_color="#FCB754",
         after_color="#3EC8FB",
         save=False,
@@ -233,8 +230,8 @@ def psm_effect_size_plot(
         and passing in the psm object instead of assuming a call from inside
         the PsmPy class.
         """
-        df_preds_after = psm.df_matched[[psm.treatment] + psm.xvars]
-        df_preds_b4 = psm.data[[psm.treatment] + psm.xvars]
+        df_preds_after = psm.df_matched[[psm.treatment] + psm.xvars]  # noqa: RUF005
+        df_preds_b4 = psm.data[[psm.treatment] + psm.xvars]  # noqa: RUF005
         df_preds_after_float = df_preds_after.astype(float)
         df_preds_b4_float = df_preds_b4.astype(float)
 
@@ -255,7 +252,7 @@ def psm_effect_size_plot(
             orient="h",
         )
         sns_plot.set(title=title)
-        if save == True:
+        if save:
             sns_plot.figure.savefig(filename, dpi=250, bbox_inches="tight")
 
     def generate_psm_analysis(
@@ -271,7 +268,9 @@ def generate_psm_analysis(
         ).as_pandas()
         symptoms_dict = self._get_symptoms_dict(self.config.classification_json)
         for dependent_variable, codes in symptoms_dict.items():
-            df[dependent_variable] = df["code"].apply(lambda x: 1 if x in codes else 0)
+            df[dependent_variable] = df["code"].apply(
+                lambda x, codes=codes: 1 if x in codes else 0
+            )
         df = df.drop(columns="code")
         # instance_count present but unused for PSM if table contains a count_ref input
         # (it's intended for manual review)
@@ -315,8 +314,8 @@ def generate_psm_analysis(
             # mentioning workarounds for this behavior.
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore", category=UserWarning)
-                # This function populates the psm.predicted_data element, which is required
-                # for things like the knn_matched() function call
+                # This function populates the psm.predicted_data element, which is
+                # required for things like the knn_matched() function call
                 psm.logistic_ps(balance=True)
                 # This function populates the psm.df_matched element
                 psm.knn_matched(
@@ -338,11 +337,13 @@ def generate_psm_analysis(
                 )
         except ZeroDivisionError:
             sys.exit(
-                "Encountered a divide by zero error during statistical graph generation. Try increasing your sample size."
+                "Encountered a divide by zero error during statistical graph "
+                "generation. Try increasing your sample size."
             )
         except ValueError:
             sys.exit(
-                "Encountered a value error during KNN matching. Try increasing your sample size."
+                "Encountered a value error during KNN matching. Try increasing "
+                "your sample size."
             )
 
     def prepare_queries(self, cursor: object, schema: str, table_suffix: str):
@@ -354,7 +355,7 @@ def post_execution(
         schema: str,
         verbose: bool,
         drop_table: bool = False,
-        table_suffix: str = None,
+        table_suffix: str | None = None,
     ):
         # super().execute_queries(cursor, schema, verbose, drop_table)
         self.generate_psm_analysis(cursor, schema, table_suffix)
diff --git a/cumulus_library/studies/core/builder_condition.py b/cumulus_library/studies/core/builder_condition.py
index afdf28c5..1492f509 100644
--- a/cumulus_library/studies/core/builder_condition.py
+++ b/cumulus_library/studies/core/builder_condition.py
@@ -1,9 +1,7 @@
-from cumulus_library import base_table_builder
-from cumulus_library import databases
+from cumulus_library import base_table_builder, databases
 from cumulus_library.studies.core.core_templates import core_templates
 from cumulus_library.template_sql import base_templates, sql_utils
 
-
 expected_table_cols = {
     "condition": {
         "category": [
diff --git a/cumulus_library/studies/core/builder_documentreference.py b/cumulus_library/studies/core/builder_documentreference.py
index 22d047d6..82b33937 100644
--- a/cumulus_library/studies/core/builder_documentreference.py
+++ b/cumulus_library/studies/core/builder_documentreference.py
@@ -1,16 +1,13 @@
-from cumulus_library import base_table_builder
-from cumulus_library import databases
+from cumulus_library import base_table_builder, databases
 from cumulus_library.studies.core.core_templates import core_templates
 from cumulus_library.template_sql import sql_utils
 
-
 expected_table_cols = {
     "documentreference": {
         "id": [],
         "type": [],
         "status": [],
         "docstatus": [],
-        "context": [],
         "subject": ["reference"],
         "context": ["period", "start"],
     }
@@ -37,7 +34,7 @@ def prepare_queries(
                     source_id="id",
                     column_name="type",
                     is_array=False,
-                    target_table=f"core__documentreference_dn_type",
+                    target_table="core__documentreference_dn_type",
                 )
             ],
         )
diff --git a/cumulus_library/studies/core/builder_encounter.py b/cumulus_library/studies/core/builder_encounter.py
index 6f5595c7..cd3552cd 100644
--- a/cumulus_library/studies/core/builder_encounter.py
+++ b/cumulus_library/studies/core/builder_encounter.py
@@ -1,9 +1,7 @@
-from cumulus_library import base_table_builder
-from cumulus_library import databases
+from cumulus_library import base_table_builder, databases
 from cumulus_library.studies.core.core_templates import core_templates
 from cumulus_library.template_sql import sql_utils
 
-
 expected_table_cols = {
     "encounter": {
         "status": [],
diff --git a/cumulus_library/studies/core/builder_medication.py b/cumulus_library/studies/core/builder_medication.py
index f61a5643..c85690b4 100644
--- a/cumulus_library/studies/core/builder_medication.py
+++ b/cumulus_library/studies/core/builder_medication.py
@@ -1,8 +1,8 @@
 """ Module for generating core medication table"""
 
 from cumulus_library import base_table_builder, base_utils
-from cumulus_library.template_sql import base_templates, sql_utils
 from cumulus_library.studies.core.core_templates import core_templates
+from cumulus_library.template_sql import base_templates, sql_utils
 
 
 class MedicationBuilder(base_table_builder.BaseTableBuilder):
diff --git a/cumulus_library/studies/core/builder_medicationrequest.py b/cumulus_library/studies/core/builder_medicationrequest.py
index 3030ea7e..7203a924 100644
--- a/cumulus_library/studies/core/builder_medicationrequest.py
+++ b/cumulus_library/studies/core/builder_medicationrequest.py
@@ -3,10 +3,9 @@
 Note: This module assumes that you have already run builder_medication,
 as it leverages the core__medication table for data population"""
 
-from cumulus_library import base_table_builder
-from cumulus_library.template_sql import base_templates, sql_utils
-from cumulus_library import databases
+from cumulus_library import base_table_builder, databases
 from cumulus_library.studies.core.core_templates import core_templates
+from cumulus_library.template_sql import base_templates, sql_utils
 
 expected_table_cols = {
     "medicationrequest": {
diff --git a/cumulus_library/studies/core/builder_observation.py b/cumulus_library/studies/core/builder_observation.py
index 294ba3c4..48db3032 100644
--- a/cumulus_library/studies/core/builder_observation.py
+++ b/cumulus_library/studies/core/builder_observation.py
@@ -2,10 +2,9 @@
 
 from dataclasses import dataclass
 
-from cumulus_library import base_table_builder
-from cumulus_library.template_sql import sql_utils
-from cumulus_library import databases
+from cumulus_library import base_table_builder, databases
 from cumulus_library.studies.core.core_templates import core_templates
+from cumulus_library.template_sql import sql_utils
 
 expected_table_cols = {
     "observation": {
diff --git a/cumulus_library/studies/core/builder_patient.py b/cumulus_library/studies/core/builder_patient.py
index 8302163e..f6c62e20 100644
--- a/cumulus_library/studies/core/builder_patient.py
+++ b/cumulus_library/studies/core/builder_patient.py
@@ -1,16 +1,15 @@
 """ Module for extracting US core extensions from patient records"""
 
-from cumulus_library.base_table_builder import BaseTableBuilder
-from cumulus_library.template_sql import base_templates, sql_utils
 from cumulus_library import databases
+from cumulus_library.base_table_builder import BaseTableBuilder
 from cumulus_library.studies.core.core_templates import core_templates
+from cumulus_library.template_sql import base_templates, sql_utils
 
 expected_table_cols = {
     "patient": {
         "id": [],
         "gender": [],
         "address": [],
-        "id": [],
         "birthdate": [],
     }
 }
diff --git a/cumulus_library/studies/core/core_templates/core_templates.py b/cumulus_library/studies/core/core_templates/core_templates.py
index ce4808c3..0c0e7562 100644
--- a/cumulus_library/studies/core/core_templates/core_templates.py
+++ b/cumulus_library/studies/core/core_templates/core_templates.py
@@ -1,5 +1,4 @@
 import pathlib
-import typing
 
 import jinja2
 
@@ -9,7 +8,9 @@
 
 
 def get_core_template(
-    target_table: str, schema: dict[dict[bool]] = None, config: dict = None
+    target_table: str,
+    schema: dict[dict[bool]] | None = None,
+    config: dict | None = None,
 ) -> str:
     """Extracts code system details as a standalone table"""
     with open(f"{PATH}/{target_table}.sql.jinja") as file:
diff --git a/cumulus_library/studies/core/count_core.py b/cumulus_library/studies/core/count_core.py
index fd2e214d..fa8a6775 100644
--- a/cumulus_library/studies/core/count_core.py
+++ b/cumulus_library/studies/core/count_core.py
@@ -1,5 +1,5 @@
-from typing import List
 from pathlib import Path
+
 from cumulus_library.statistics import counts
 
 
@@ -25,7 +25,7 @@ def count_core_documentreference(self, duration: str = "month"):
         ]
         return self.count_documentreference(table_name, from_table, cols)
 
-    def count_core_encounter(self, duration: str = None):
+    def count_core_encounter(self, duration: str | None = None):
         table_name = self.get_table_name("count_encounter", duration=duration)
         from_table = self.get_table_name("encounter")
 
@@ -41,7 +41,7 @@ def count_core_encounter(self, duration: str = None):
         return self.count_encounter(table_name, from_table, cols)
 
     def _count_core_encounter_type(
-        self, table_name: str, cols: list, duration: str = None
+        self, table_name: str, cols: list, duration: str | None = None
     ):
         """
         Encounter Type information is for every visit, and therefore this
@@ -61,7 +61,7 @@ def _count_core_encounter_type(
         return self.count_encounter(table_name, from_table, cols)
 
     # TODO: consider renaming function and table to `count_core_encounter_all_types`
-    def count_core_encounter_type(self, duration: str = None):
+    def count_core_encounter_type(self, duration: str | None = None):
         cols = [
             "enc_class_display",
             "enc_type_display",
diff --git a/cumulus_library/studies/discovery/code_detection.py b/cumulus_library/studies/discovery/code_detection.py
index e8fda411..5e8c449b 100644
--- a/cumulus_library/studies/discovery/code_detection.py
+++ b/cumulus_library/studies/discovery/code_detection.py
@@ -1,39 +1,40 @@
 """ Module for generating encounter codeableConcept table"""
 
-from cumulus_library import base_table_builder, helper
-from cumulus_library.template_sql import templates, utils
-
+from cumulus_library import base_table_builder, base_utils
 from cumulus_library.studies.discovery import code_definitions
+from cumulus_library.template_sql import base_templates, sql_utils
 
 
-class CodeDetectionBuilder(BaseTableBuilder):
+class CodeDetectionBuilder(base_table_builder.BaseTableBuilder):
     display_text = "Selecting unique code systems..."
 
     def _check_codes_in_fields(self, code_sources: list[dict], schema, cursor) -> dict:
         """checks if Coding/CodeableConcept fields are present and populated"""
 
-        with helper.get_progress_bar() as progress:
+        with base_utils.get_progress_bar() as progress:
             task = progress.add_task(
                 "Discovering available coding systems...",
                 total=len(code_sources),
             )
             for code_source in code_sources:
                 if code_source["is_array"]:
-                    code_source["has_data"] = utils.is_codeable_concept_array_populated(
+                    code_source[
+                        "has_data"
+                    ] = sql_utils.is_codeable_concept_array_populated(
                         schema,
                         code_source["table_name"],
                         code_source["column_name"],
                         cursor,
                     )
                 elif code_source["is_bare_coding"]:
-                    code_source["has_data"] = utils.is_code_populated(
+                    code_source["has_data"] = sql_utils.is_code_populated(
                         schema,
                         code_source["table_name"],
                         code_source["column_name"],
                         cursor,
                     )
                 else:
-                    code_source["has_data"] = utils.is_codeable_concept_populated(
+                    code_source["has_data"] = sql_utils.is_codeable_concept_populated(
                         schema,
                         code_source["table_name"],
                         code_source["column_name"],
@@ -57,7 +58,7 @@ def prepare_queries(self, cursor: object, schema: str, *args, **kwargs):
             ):
                 raise KeyError(
                     "Expected table_name and column_name keys in "
-                    f"{str(code_definition)}"
+                    f"{code_definition!s}"
                 )
             code_source = {
                 "is_bare_coding": False,
@@ -69,5 +70,7 @@ def prepare_queries(self, cursor: object, schema: str, *args, **kwargs):
             code_sources.append(code_source)
 
         code_sources = self._check_codes_in_fields(code_sources, schema, cursor)
-        query = templates.get_code_system_pairs("discovery__code_sources", code_sources)
+        query = base_templates.get_code_system_pairs(
+            "discovery__code_sources", code_sources
+        )
         self.queries.append(query)
diff --git a/cumulus_library/studies/vocab/vocab_icd_builder.py b/cumulus_library/studies/vocab/vocab_icd_builder.py
index cb9f849f..4a323893 100644
--- a/cumulus_library/studies/vocab/vocab_icd_builder.py
+++ b/cumulus_library/studies/vocab/vocab_icd_builder.py
@@ -34,12 +34,11 @@ def prepare_queries(self, cursor: object, schema: str, *args, **kwargs):
         path = pathlib.Path(__file__).parent
 
         headers = ["CUI", "TTY", "CODE", "SAB", "STR"]
-        header_types = [f"{x} string" for x in headers]
         rows_processed = 0
         dataset = []
         created = False
         for filename in icd_files:
-            with open(f"{path}/{filename}.bsv", "r") as file:
+            with open(f"{path}/{filename}.bsv") as file:
                 # For the first row in the dataset, we want to coerce types from
                 # varchar(len(item)) athena default to to an unrestricted varchar, so
                 # we'll create a table with one row - this make the recast faster, and
diff --git a/cumulus_library/study_parser.py b/cumulus_library/study_parser.py
index 71a6de00..7b1a4aa7 100644
--- a/cumulus_library/study_parser.py
+++ b/cumulus_library/study_parser.py
@@ -1,21 +1,18 @@
 """ Contains classes for loading study data based on manifest.toml files """
 
 import csv
-import inspect
 import importlib.util
+import inspect
 import pathlib
 import sys
 
-from typing import List, Optional
-
 import toml
-
 from rich.progress import Progress, TaskID, track
 
 from cumulus_library import (
     __version__,
-    base_utils,
     base_table_builder,
+    base_utils,
     databases,
     enums,
     errors,
@@ -24,8 +21,6 @@
 from cumulus_library.statistics import psm
 from cumulus_library.template_sql import base_templates
 
-StrList = List[str]
-
 
 class StudyManifestParser:
     """Handles loading of study data from manifest files.
@@ -38,18 +33,17 @@ class StudyManifestParser:
     future.
     """
 
-    _study_path = None
-    _study_config = {}
-
     def __init__(
         self,
-        study_path: Optional[pathlib.Path] = None,
-        data_path: Optional[pathlib.Path] = None,
+        study_path: pathlib.Path | None = None,
+        data_path: pathlib.Path | None = None,
     ):
         """Instantiates a StudyManifestParser.
 
         :param study_path: A pathlib Path object, optional
         """
+        self._study_path = None
+        self._study_config = {}
         if study_path is not None:
             self.load_study_manifest(study_path)
         self.data_path = data_path
@@ -75,19 +69,19 @@ def load_study_manifest(self, study_path: pathlib.Path) -> None:
                     )
                 self._study_config = config
             self._study_path = study_path
-        except FileNotFoundError:
+        except FileNotFoundError as e:
             raise errors.StudyManifestFilesystemError(  # pylint: disable=raise-missing-from
                 f"Missing or invalid manifest found at {study_path}"
-            )
+            ) from e
 
-    def get_study_prefix(self) -> Optional[str]:
+    def get_study_prefix(self) -> str | None:
         """Reads the name of a study prefix from the in-memory study config
 
         :returns: A string of the prefix in the manifest, or None if not found
         """
         return self._study_config.get("study_prefix")
 
-    def get_sql_file_list(self, continue_from: str = None) -> Optional[StrList]:
+    def get_sql_file_list(self, continue_from: str | None = None) -> list[str] | None:
         """Reads the contents of the sql_config array from the manifest
 
         :returns: An array of sql files from the manifest, or None if not found.
@@ -105,7 +99,7 @@ def get_sql_file_list(self, continue_from: str = None) -> Optional[StrList]:
                 )
         return sql_files
 
-    def get_table_builder_file_list(self) -> Optional[StrList]:
+    def get_table_builder_file_list(self) -> list[str] | None:
         """Reads the contents of the table_builder_config array from the manifest
 
         :returns: An array of sql files from the manifest, or None if not found.
@@ -113,7 +107,7 @@ def get_table_builder_file_list(self) -> Optional[StrList]:
         sql_config = self._study_config.get("table_builder_config", {})
         return sql_config.get("file_names", [])
 
-    def get_counts_builder_file_list(self) -> Optional[StrList]:
+    def get_counts_builder_file_list(self) -> list[str] | None:
         """Reads the contents of the counts_builder_config array from the manifest
 
         :returns: An array of sql files from the manifest, or None if not found.
@@ -121,15 +115,16 @@ def get_counts_builder_file_list(self) -> Optional[StrList]:
         sql_config = self._study_config.get("counts_builder_config", {})
         return sql_config.get("file_names", [])
 
-    def get_statistics_file_list(self) -> Optional[StrList]:
+    def get_statistics_file_list(self) -> list[str] | None:
         """Reads the contents of the statistics_config array from the manifest
 
-        :returns: An array of statistics toml files from the manifest, or None if not found.
+        :returns: An array of statistics toml files from the manifest,
+          or None if not found.
         """
         stats_config = self._study_config.get("statistics_config", {})
         return stats_config.get("file_names", [])
 
-    def get_export_table_list(self) -> Optional[StrList]:
+    def get_export_table_list(self) -> list[str] | None:
         """Reads the contents of the export_list array from the manifest
 
         :returns: An array of tables to export from the manifest, or None if not found.
@@ -208,8 +203,8 @@ def clean_study(
         schema_name: str,
         stats_clean: bool = False,
         verbose: bool = False,
-        prefix: str = None,
-    ) -> List:
+        prefix: str | None = None,
+    ) -> list:
         """Removes tables beginning with the study prefix from the database schema
 
         :param cursor: A DatabaseCursor object
@@ -296,7 +291,7 @@ def _execute_drop_queries(
         self,
         cursor: databases.DatabaseCursor,
         verbose: bool,
-        view_table_list: List,
+        view_table_list: list,
         progress: Progress,
         task: TaskID,
     ) -> None:
@@ -304,7 +299,8 @@ def _execute_drop_queries(
 
         :param cursor: A DatabaseCursor object
         :param verbose: toggle from progress bar to query output
-        :param view_table_list: a list of views and tables beginning with the study prefix
+        :param view_table_list: a list of views and tables beginning with
+          the study prefix
         :param progress: a rich progress bar renderer
         :param task: a TaskID for a given progress bar
         """
@@ -326,7 +322,7 @@ def _load_and_execute_builder(
         drop_table: bool = False,
         parser: databases.DatabaseParser = None,
         write_reference_sql: bool = False,
-        doc_str: str = None,
+        doc_str: str | None = None,
     ) -> None:
         """Loads a table builder from a file.
 
@@ -336,7 +332,7 @@ def _load_and_execute_builder(
 
         As with eating an ortolan, you may wish to cover your head with a cloth.
         Per an article on the subject: "Tradition dictates that this is to shield
-        – from God’s eyes – the shame of such a decadent and disgraceful act."
+        - from God's eyes - the shame of such a decadent and disgraceful act."
 
         """
         spec = importlib.util.spec_from_file_location(
@@ -528,7 +524,7 @@ def run_generate_sql(
         parser: databases.DatabaseParser = None,
         verbose: bool = False,
     ) -> None:
-        """Generates reference SQL from all BaseTableBuilder-derived classes in the manifest
+        """Generates reference SQL from builders listed in the manifest
 
         :param cursor: A DatabaseCursor object
         :param schema: The name of the schema to write tables to
@@ -540,10 +536,10 @@ def run_generate_sql(
             + self.get_statistics_file_list()
         )
         doc_str = (
-            "-- This sql was autogenerated as a reference example using the library CLI.\n"
-            "-- Its format is tied to the specific database it was run against, and it may not\n"
-            "-- be correct for all databases. Use the CLI's build option to derive the best SQL\n"
-            "-- for your dataset."
+            "-- This sql was autogenerated as a reference example using the library\n"
+            "-- CLI.Its format is tied to the specific database it was run against,\n"
+            "-- and it may not be correct for all databases. Use the CLI's build \n"
+            "-- option to derive the best SQL for your dataset."
         )
         for file in all_generators:
             self._load_and_execute_builder(
@@ -560,8 +556,8 @@ def build_study(
         self,
         cursor: databases.DatabaseCursor,
         verbose: bool = False,
-        continue_from: str = None,
-    ) -> List:
+        continue_from: str | None = None,
+    ) -> list:
         """Creates tables in the schema by iterating through the sql_config.file_names
 
         :param cursor: A DatabaseCursor object
@@ -593,9 +589,10 @@ def build_study(
             )
         return queries
 
-    def _query_error(self, query_and_filename: List, exit_message: str) -> None:
+    def _query_error(self, query_and_filename: list, exit_message: str) -> None:
         print(
-            f"An error occured executing the following query in {query_and_filename[1]}:",
+            "An error occured executing the following query in ",
+            f"{query_and_filename[1]}:",
             file=sys.stderr,
         )
         print("--------", file=sys.stderr)
@@ -637,8 +634,8 @@ def _execute_build_queries(
                     "This query contains a table name which contains a reserved word "
                     "immediately after the study prefix. Please rename this table so "
                     "that is does not begin with one of these special words "
-                    "immediately after the double undescore.\n"
-                    f"Reserved words: {str(word.value for word in enums.ProtectedTableKeywords)}",
+                    "immediately after the double undescore.\n Reserved words: "
+                    f"{(word.value for word in enums.ProtectedTableKeywords)}",
                 )
             if create_line.count("__") > 1:
                 self._query_error(
@@ -669,7 +666,7 @@ def _execute_build_queries(
 
     def export_study(
         self, db: databases.DatabaseBackend, data_path: pathlib.Path
-    ) -> List:
+    ) -> list:
         """Exports csvs/parquet extracts of tables listed in export_list
 
         :param db: A database backend
@@ -683,7 +680,7 @@ def export_study(
         ):
             query = f"select * from {table}"
             dataframe = db.execute_as_pandas(query)
-            path = pathlib.Path(f"{str(data_path)}/{self.get_study_prefix()}/")
+            path = pathlib.Path(f"{data_path}/{self.get_study_prefix()}/")
             path.mkdir(parents=True, exist_ok=True)
             dataframe = dataframe.sort_values(
                 by=list(dataframe.columns), ascending=False, na_position="first"
diff --git a/cumulus_library/template_sql/base_templates.py b/cumulus_library/template_sql/base_templates.py
index cff0f4ac..8b464d1f 100644
--- a/cumulus_library/template_sql/base_templates.py
+++ b/cumulus_library/template_sql/base_templates.py
@@ -2,12 +2,10 @@
 
 from enum import Enum
 from pathlib import Path
-from typing import Dict, List, Optional
 
 from jinja2 import Template
 from pandas import DataFrame
 
-from cumulus_library import databases
 from cumulus_library.template_sql import sql_utils
 
 PATH = Path(__file__).parent
@@ -71,7 +69,7 @@ def get_codeable_concept_denormalize_query(
         )
 
 
-def get_column_datatype_query(schema_name: str, table_name: str, column_names: List):
+def get_column_datatype_query(schema_name: str, table_name: str, column_names: list):
     with open(f"{PATH}/column_datatype.sql.jinja") as column_datatype:
         return Template(column_datatype.read()).render(
             schema_name=schema_name,
@@ -81,7 +79,7 @@ def get_column_datatype_query(schema_name: str, table_name: str, column_names: L
 
 
 def get_create_view_query(
-    view_name: str, dataset: List[List[str]], view_cols: List[str]
+    view_name: str, dataset: list[list[str]], view_cols: list[str]
 ) -> str:
     """Generates a create view as query for inserting static data into athena
 
@@ -98,7 +96,7 @@ def get_create_view_query(
 
 
 def get_ctas_query(
-    schema_name: str, table_name: str, dataset: List[List[str]], table_cols: List[str]
+    schema_name: str, table_name: str, dataset: list[list[str]], table_cols: list[str]
 ) -> str:
     """Generates a create table as query for inserting static data into athena
 
@@ -139,8 +137,8 @@ def get_ctas_query_from_df(schema_name: str, table_name: str, df: DataFrame) ->
 def get_ctas_empty_query(
     schema_name: str,
     table_name: str,
-    table_cols: List[str],
-    table_cols_types: List[str] = None,
+    table_cols: list[str],
+    table_cols_types: list[str] | None = None,
 ) -> str:
     """Generates a create table as query for initializing an empty table
 
@@ -152,7 +150,8 @@ def get_ctas_empty_query(
     :param schema_name: The athena schema to create the table in
     :param table_name: The name of the athena table to create
     :param table_cols: Comma deleniated column names, i.e. ['first,second']
-    :param table_cols_types: Allows specifying a data type per column (default: all varchar)
+    :param table_cols_types: Allows specifying a data type per column
+      (default: all varchar)
     """
     if not table_cols_types:
         table_cols_types = ["varchar"] * len(table_cols)
@@ -201,9 +200,9 @@ def get_extension_denormalize_query(config: sql_utils.ExtensionConfig) -> str:
 
 def get_insert_into_query(
     table_name: str,
-    table_cols: List[str],
-    dataset: List[List[str]],
-    type_casts: Dict = {},
+    table_cols: list[str],
+    dataset: list[list[str]],
+    type_casts: dict | None = None,
 ) -> str:
     """Generates an insert query for adding data to an existing athena table
 
@@ -212,6 +211,8 @@ def get_insert_into_query(
     :param table_cols: Comma deleniated column names, i.e. ['first','second']
     :param dataset: Array of data arrays to insert, i.e. [['1','3'],['2','4']]
     """
+    if type_casts is None:
+        type_casts = {}
     with open(f"{PATH}/insert_into.sql.jinja") as insert_into:
         return Template(insert_into.read()).render(
             table_name=table_name,
@@ -224,9 +225,13 @@ def get_insert_into_query(
 def get_is_table_not_empty_query(
     source_table: str,
     field: str,
-    unnests: Optional[list[dict]] = [],
-    conditions: Optional[list[str]] = [],
+    unnests: list[dict] | None = None,
+    conditions: list[str] | None = None,
 ):
+    if unnests is None:
+        unnests = []
+    if conditions is None:
+        conditions = []
     with open(f"{PATH}/is_table_not_empty.sql.jinja") as is_table_not_empty:
         return Template(is_table_not_empty.read()).render(
             source_table=source_table,
diff --git a/cumulus_library/template_sql/sql_utils.py b/cumulus_library/template_sql/sql_utils.py
index 2664be56..797bc330 100644
--- a/cumulus_library/template_sql/sql_utils.py
+++ b/cumulus_library/template_sql/sql_utils.py
@@ -12,11 +12,9 @@
 from dataclasses import dataclass
 
 import duckdb
-from typing import List
 
-from cumulus_library import base_utils
+from cumulus_library import base_utils, databases
 from cumulus_library.template_sql import base_templates
-from cumulus_library import databases
 
 
 @dataclass(kw_only=True)
@@ -44,7 +42,7 @@ class CodeableConceptConfig:
 
 
 @dataclass
-class ExtensionConfig(object):
+class ExtensionConfig:
     """convenience class for holding parameters for generating extension tables.
 
     :param source_table: the table to extract extensions from
@@ -61,14 +59,14 @@ class ExtensionConfig(object):
     target_table: str
     target_col_prefix: str
     fhir_extension: str
-    ext_systems: List[str]
+    ext_systems: list[str]
     is_array: bool = False
 
 
 def _check_data_in_fields(
     schema,
     cursor,
-    code_sources: List[CodeableConceptConfig],
+    code_sources: list[CodeableConceptConfig],
 ) -> dict:
     """checks if CodeableConcept fields actually have data available
 
@@ -112,7 +110,7 @@ def _check_data_in_fields(
 def denormalize_codes(
     schema: str,
     cursor: databases.DatabaseCursor,
-    code_sources: List[CodeableConceptConfig],
+    code_sources: list[CodeableConceptConfig],
 ):
     queries = []
     code_sources = _check_data_in_fields(schema, cursor, code_sources)
@@ -284,7 +282,7 @@ def _check_schema_if_exists(
             if coding_element in schema_str:
                 return False
         else:
-            required_fields = [coding_element] + ["code", "system", "display"]
+            required_fields = [coding_element, "code", "system", "display"]
             if any(x not in schema_str for x in required_fields):
                 return False
         return True
diff --git a/cumulus_library/template_sql/statistics/counts_templates.py b/cumulus_library/template_sql/statistics/counts_templates.py
index 32159062..c32200d5 100644
--- a/cumulus_library/template_sql/statistics/counts_templates.py
+++ b/cumulus_library/template_sql/statistics/counts_templates.py
@@ -1,7 +1,6 @@
-from enum import Enum
 from dataclasses import dataclass
+from enum import Enum
 from pathlib import Path
-from typing import Optional
 
 from jinja2 import Template
 
@@ -39,9 +38,9 @@ def get_count_query(
     source_table: str,
     table_cols: list,
     min_subject: int = 10,
-    where_clauses: Optional[list] = None,
-    fhir_resource: Optional[str] = None,
-    filter_resource: Optional[bool] = True,
+    where_clauses: list | None = None,
+    fhir_resource: str | None = None,
+    filter_resource: bool | None = True,
 ) -> str:
     """Generates count tables for generating study outputs"""
     path = Path(__file__).parent
diff --git a/cumulus_library/template_sql/statistics/psm_templates.py b/cumulus_library/template_sql/statistics/psm_templates.py
index 41a05932..a91f5bb9 100644
--- a/cumulus_library/template_sql/statistics/psm_templates.py
+++ b/cumulus_library/template_sql/statistics/psm_templates.py
@@ -1,17 +1,17 @@
 """ Collection of jinja template getters for common SQL queries """
 
-from enum import Enum
 from pathlib import Path
-from typing import Dict, List, Optional
 
 from jinja2 import Template
-from pandas import DataFrame
 
 from cumulus_library.errors import CumulusLibraryError
 
 
 def get_distinct_ids(
-    columns: list[str], source_table: str, join_id: str = None, filter_table: str = None
+    columns: list[str],
+    source_table: str,
+    join_id: str | None = None,
+    filter_table: str | None = None,
 ) -> str:
     """Gets distinct ids from a table, optionally filtering by ids in another table
 
@@ -23,12 +23,14 @@ def get_distinct_ids(
 
     :param columns: a list of ids to request
     :param source_table: the table to retrieve ids from
-    :param join_id: the id column to use for joining. Expected to exist in both source and filter tables
+    :param join_id: the id column to use for joining. Expected to exist in both
+      source and filter tables
     :param filter_table: a table containing ids you want to exclude
     """
     if (join_id and not filter_table) or (not join_id and filter_table):
         raise CumulusLibraryError(
-            "psm_templates.get_distinct_ids expects all optional parameters to be defined if supplied"
+            "psm_templates.get_distinct_ids expects all optional parameters ",
+            "to be defined if supplied",
         )
 
     path = Path(__file__).parent
@@ -49,8 +51,8 @@ def get_create_covariate_table(
     primary_ref: str,
     dependent_variable: str,
     join_cols_by_table: dict,
-    count_ref: str = None,
-    count_table: str = None,
+    count_ref: str | None = None,
+    count_table: str | None = None,
 ) -> str:
     """Gets a query to create a covariate table for PSM analysis
 
@@ -68,7 +70,8 @@ def get_create_covariate_table(
     """
     if (count_ref and not count_table) or (not count_ref and count_table):
         raise CumulusLibraryError(
-            "psm_templates.get_create_covariate_table expects all count parameters to be defined if supplied"
+            "psm_templates.get_create_covariate_table expects all count parameters ",
+            "to be defined if supplied",
         )
 
     path = Path(__file__).parent
diff --git a/cumulus_library/upload.py b/cumulus_library/upload.py
index 21edbdd0..b0489e02 100644
--- a/cumulus_library/upload.py
+++ b/cumulus_library/upload.py
@@ -1,11 +1,9 @@
 """ Handles pushing data to the aggregator"""
 
 import sys
-
 from pathlib import Path
 
 import requests
-
 from pandas import read_parquet
 from rich.progress import Progress, TaskID
 
diff --git a/pyproject.toml b/pyproject.toml
index 8f6254ff..f9459556 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,9 +30,8 @@ classifiers = [
 dynamic=["version"]
 [project.optional-dependencies]
 dev = [
-    "black <25, >=24",
+    "ruff == 0.2.1",
     "pre-commit",
-    "pylint",
 ]
 test = [
     "freezegun",
@@ -55,3 +54,24 @@ build-backend = "flit_core.buildapi"
 
 [tool.flit.sdist]
 include = [".sqlfluff"]
+
+[tool.ruff]
+target-version = "py310"
+
+lint.select = [
+    "A",  # prevent using keywords that clobber python builtins
+    "B",  # bugbear: security warnings
+    "E",  # pycodestyle
+    "F",  # pyflakes
+    "I",  # isort
+    "ISC",  # implicit string concatenation
+    "PLE",  # pylint errors
+    "RUF",  # the ruff developer's own rules
+    "UP",  # alert you when better syntax is available in your python version
+]
+lint.ignore = [
+    "ISC001"  # Recommended ingore from `ruff format`
+]
+
+[tool.ruff.lint.per-file-ignores]
+"cumulus_library/schema/valueset.py" = ["E501"]
diff --git a/tests/conftest.py b/tests/conftest.py
index 3a5dc091..5b953fe7 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,18 +2,14 @@
 
 import copy
 import json
-import os
-import tempfile
-
-from enum import IntEnum
 from pathlib import Path
 
+import numpy
 import pandas
 import pytest
-import numpy
 
 from cumulus_library.cli import StudyRunner
-from cumulus_library.databases import create_db_backend, DatabaseCursor
+from cumulus_library.databases import DatabaseCursor, create_db_backend
 
 MOCK_DATA_DIR = f"{Path(__file__).parent}/test_data/duckdb_data"
 ID_PATHS = {
@@ -60,7 +56,8 @@ def duckdb_args(args: list, tmp_path, stats=False):
         target = f"{MOCK_DATA_DIR}"
 
     if args[0] == "build":
-        return args + [
+        return [
+            *args,
             "--db-type",
             "duckdb",
             "--load-ndjson-dir",
@@ -69,14 +66,15 @@ def duckdb_args(args: list, tmp_path, stats=False):
             f"{tmp_path}/duck.db",
         ]
     elif args[0] == "export":
-        return args + [
+        return [
+            *args,
             "--db-type",
             "duckdb",
             "--database",
             f"{tmp_path}/duck.db",
             f"{tmp_path}/counts",
         ]
-    return args + ["--db-type", "duckdb", "--database", f"{tmp_path}/duck.db"]
+    return [*args, "--db-type", "duckdb", "--database", f"{tmp_path}/duck.db"]
 
 
 def ndjson_data_generator(source_dir: Path, target_dir: Path, iterations: int):
@@ -121,7 +119,7 @@ def update_nested_obj(id_path, obj, i):
                             # panda's deep copy is not recursive, so we have to do it
                             # again for nested objects
                             df[id_path[0]] = df[id_path[0]].map(
-                                lambda x: update_nested_obj(
+                                lambda x, i=i, id_path=id_path: update_nested_obj(
                                     id_path[1:], copy.deepcopy(x), i
                                 )
                             )
diff --git a/tests/regression/run_regression.py b/tests/regression/run_regression.py
index 5cd5049e..60691e3b 100644
--- a/tests/regression/run_regression.py
+++ b/tests/regression/run_regression.py
@@ -16,7 +16,6 @@
 
 import os
 import sys
-
 from pathlib import Path
 
 from pandas import read_parquet
@@ -32,8 +31,8 @@
     export_missing = exports - references
     sys.exit(
         "❌ Found differences in files present: ❌\n"
-        f"Files present in reference not in export: {str(ref_missing)}\n"
-        f"Files present in export not in reference: {str(export_missing)}"
+        f"Files present in reference not in export: {ref_missing!s}\n"
+        f"Files present in export not in reference: {export_missing!s}"
     )
 diffs = []
 for filename in references:
@@ -59,7 +58,7 @@
             diffs.append(
                 [
                     filename,
-                    ("Rows differ between reference and export:\n" f"{compared}"),
+                    ("Rows differ between reference and export:\n", f"{compared}"),
                 ]
             )
 if len(diffs) > 0:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 825298b2..6560d287 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -4,10 +4,8 @@
 import glob
 import os
 import shutil
-import sysconfig
-
 from contextlib import nullcontext as does_not_raise
-from pathlib import Path, PosixPath
+from pathlib import Path
 from unittest import mock
 
 import pytest
@@ -29,7 +27,8 @@ def open(self, *args, **kwargs):
         if str(args[0]).endswith(".bsv"):
             args = (
                 Path(
-                    f"./tests/test_data/mock_bsvs/{str(args[0]).rsplit('/', maxsplit=1)[-1]}"
+                    "./tests/test_data/mock_bsvs/",
+                    f"{str(args[0]).rsplit('/', maxsplit=1)[-1]}",
                 ),
                 "r",
             )
@@ -101,11 +100,12 @@ def test_cli_early_exit(args):
         ),
     ],
 )
-def test_cli_path_mapping(
-    mock_load_json, mock_path, tmp_path, args, raises, expected
-):  # pylint: disable=unused-argument
+def test_cli_path_mapping(mock_load_json, mock_path, tmp_path, args, raises, expected):  # pylint: disable=unused-argument
     with raises:
-        mock_path.return_value = f"{Path(__file__).resolve().parents[0]}" "/test_data/"
+        mock_path.return_value = (
+            f"{Path(__file__).resolve().parents[0]}",
+            "/test_data/",
+        )
         mock_load_json.return_value = {
             "__desc__": "",
             "allowlist": {
@@ -124,7 +124,7 @@ def test_cli_path_mapping(
 )
 @mock.patch("sysconfig.get_path")
 def test_count_builder_mapping(mock_path, tmp_path):
-    mock_path.return_value = f"{Path(__file__).resolve().parents[0]}" "/test_data/"
+    mock_path.return_value = (f"{Path(__file__).resolve().parents[0]}", "/test_data/")
     with does_not_raise():
         args = duckdb_args(
             [
@@ -177,7 +177,7 @@ def test_generate_sql(mock_path, tmp_path):
         assert "module1.sql" in ",".join(files)
         for file in files:
             if file.endswith("module1.sql"):
-                with open(file, "r") as f:
+                with open(file) as f:
                     query = "\n".join(line.rstrip() for line in f)
         assert "This sql was autogenerated" in query
         assert "CREATE TABLE IF NOT EXISTS study_python_valid__table" in query
@@ -220,15 +220,20 @@ def test_generate_sql(mock_path, tmp_path):
     ],
 )
 def test_clean(mock_path, tmp_path, args, expected):  # pylint: disable=unused-argument
-    mock_path.return_value = f"{Path(__file__).resolve().parents[0]}" "/test_data/"
+    mock_path.return_value = (f"{Path(__file__).resolve().parents[0]}", "/test_data/")
     cli.main(
         cli_args=duckdb_args(["build", "-t", "core", "--database", "test"], tmp_path)
     )
     with does_not_raise():
         with mock.patch.object(builtins, "input", lambda _: "y"):
             cli.main(
-                cli_args=args
-                + ["--db-type", "duckdb", "--database", f"{tmp_path}/duck.db"]
+                cli_args=[
+                    *args,
+                    "--db-type",
+                    "duckdb",
+                    "--database",
+                    f"{tmp_path}/duck.db",
+                ]
             )
             db = DuckDatabaseBackend(f"{tmp_path}/duck.db")
             for table in db.cursor().execute("show tables").fetchall():
@@ -245,7 +250,9 @@ def test_clean(mock_path, tmp_path, args, expected):  # pylint: disable=unused-a
     [
         (["build", "-t", "core"], ["export", "-t", "core"], 44),
         (
-            [  # checking that a study is loaded from a child directory of a user-defined path
+            # checking that a study is loaded from a child directory
+            # of a user-defined path
+            [
                 "build",
                 "-t",
                 "study_valid",
@@ -257,8 +264,9 @@ def test_clean(mock_path, tmp_path, args, expected):  # pylint: disable=unused-a
         ),
         (["build", "-t", "vocab"], None, 3),
         (
-            [  # checking that a study is loaded from the directory of a user-defined path.
-                # we're also validating that the CLI accepts the statistics keyword, though
+            # checking that a study is loaded from the directory of a user-defined
+            # path. we're also validating that the CLI accepts the statistics keyword
+            [
                 "build",
                 "-t",
                 "study_valid",
@@ -338,7 +346,7 @@ def test_cli_transactions(tmp_path, study, finishes, raises):
         .fetchall()
     )
     query = (
-        db.cursor().execute(f"SELECT * from study_valid__lib_transactions").fetchall()
+        db.cursor().execute("SELECT * from study_valid__lib_transactions").fetchall()
     )
     assert query[1][2] == "started"
     if finishes:
@@ -377,9 +385,9 @@ def test_cli_stats_rebuild(tmp_path):
         "--database",
         f"{tmp_path}/duck.db",
     ]
-    cli.main(cli_args=arg_list + [f"{tmp_path}/export"])
-    cli.main(cli_args=arg_list + [f"{tmp_path}/export"])
-    cli.main(cli_args=arg_list + [f"{tmp_path}/export", "--statistics"])
+    cli.main(cli_args=[*arg_list, f"{tmp_path}/export"])
+    cli.main(cli_args=[*arg_list, f"{tmp_path}/export"])
+    cli.main(cli_args=[*arg_list, f"{tmp_path}/export", "--statistics"])
     db = DuckDatabaseBackend(f"{tmp_path}/duck.db")
     expected = (
         db.cursor()
diff --git a/tests/test_conftest.py b/tests/test_conftest.py
index 7eeb68c7..7daf0daa 100644
--- a/tests/test_conftest.py
+++ b/tests/test_conftest.py
@@ -1,8 +1,7 @@
 import json
-
 from pathlib import Path
 
-from tests.conftest import ndjson_data_generator, MOCK_DATA_DIR, ID_PATHS
+from tests.conftest import ID_PATHS, MOCK_DATA_DIR, ndjson_data_generator
 
 
 def test_ndjson_data_generator(tmp_path):
@@ -15,17 +14,17 @@ def test_ndjson_data_generator(tmp_path):
         for filepath in [f for f in Path(target / key).iterdir()]:
             with open(filepath) as f:
                 first_new = json.loads(next(f))
-                for line in f:
-                    pass
-                last_new = json.loads(line)
+                *_, last_new = f
+                last_new = json.loads(last_new)
             with open(f"{Path(MOCK_DATA_DIR)}/{key}/{filepath.name}") as f:
                 first_line = next(f)
                 first_ref = json.loads(first_line)
                 # handling patient file of length 1:
-                line = first_line
-                for line in f:
-                    pass
-                last_ref = json.loads(line)
+                try:
+                    *_, last_ref = f
+                    last_ref = json.loads(last_ref)
+                except Exception:
+                    last_ref = first_ref
             for source in [[first_new, first_ref, 0], [last_new, last_ref, iters - 1]]:
                 for id_path in ID_PATHS[key]:
                     new_test = source[0]
diff --git a/tests/test_core.py b/tests/test_core.py
index b7705400..ced53f69 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,7 +1,6 @@
 """unit tests for counts generation"""
 
-import datetime  # pylint: disable=unused-import
-
+import datetime  # noqa: F401
 from pathlib import Path
 
 import pytest
@@ -14,7 +13,7 @@
 
 def get_sorted_table_data(cursor, table):
     num_cols = cursor.execute(
-        "SELECT count(*) FROM information_schema.columns " f"WHERE table_name='{table}'"
+        f"SELECT count(*) FROM information_schema.columns WHERE table_name='{table}'"
     ).fetchone()[0]
     return cursor.execute(
         f"SELECT * FROM '{table}' ORDER BY " f"{','.join(map(str, range(1,num_cols)))}"
@@ -53,7 +52,7 @@ def test_core_tables(mock_db_core, table):
     #     for row in table_rows:
     #         f.write(str(f"{row}\n"))
 
-    with open(f"./tests/test_data/core/{table}.txt", "r", encoding="UTF-8") as f:
+    with open(f"./tests/test_data/core/{table}.txt", encoding="UTF-8") as f:
         ref_table = []
         for row in f.readlines():
             ref_table.append(eval(row))  # pylint: disable=eval-used
@@ -92,7 +91,6 @@ def test_core_count_missing_data(tmp_path, mock_db):
     #         f.write(str(f"{row}\n"))
     with open(
         "./tests/test_data/core/core__count_encounter_month_missing_data.txt",
-        "r",
         encoding="UTF-8",
     ) as f:
         ref_table = []
diff --git a/tests/test_duckdb.py b/tests/test_duckdb.py
index 09fe41e4..517463ac 100644
--- a/tests/test_duckdb.py
+++ b/tests/test_duckdb.py
@@ -3,10 +3,9 @@
 import glob
 import os
 import tempfile
-
 from datetime import datetime, timedelta, timezone
-from unittest import mock
 from pathlib import Path
+from unittest import mock
 
 import pytest
 
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index 4ffe6540..21a0e91e 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -1,12 +1,10 @@
 """Test for FHIR resource parsers"""
 
-from contextlib import nullcontext as does_not_raise
 import json
-
+from contextlib import nullcontext as does_not_raise
 from unittest.mock import mock_open, patch
 
 import pytest
-
 from fhirclient.models.coding import Coding
 
 from cumulus_library.parsers import fhir_valueset
diff --git a/tests/test_psm.py b/tests/test_psm.py
index dbfc9ac0..53b7990e 100644
--- a/tests/test_psm.py
+++ b/tests/test_psm.py
@@ -4,7 +4,6 @@
 from pathlib import Path
 
 import pytest
-
 from freezegun import freeze_time
 
 from cumulus_library.cli import StudyRunner
diff --git a/tests/test_psm_templates.py b/tests/test_psm_templates.py
index cb6d0c9e..7a1aeab8 100644
--- a/tests/test_psm_templates.py
+++ b/tests/test_psm_templates.py
@@ -6,8 +6,8 @@
 
 from cumulus_library.errors import CumulusLibraryError
 from cumulus_library.template_sql.statistics.psm_templates import (
-    get_distinct_ids,
     get_create_covariate_table,
+    get_distinct_ids,
 )
 
 
diff --git a/tests/test_study_parser.py b/tests/test_study_parser.py
index 66ee0699..f6af4c29 100644
--- a/tests/test_study_parser.py
+++ b/tests/test_study_parser.py
@@ -2,7 +2,6 @@
 
 import builtins
 import pathlib
-
 from contextlib import nullcontext as does_not_raise
 from pathlib import Path
 from unittest import mock
@@ -135,7 +134,7 @@ def test_clean_study(mock_db, schema, verbose, prefix, confirm, stats, target, r
             )
             remaining_tables = (
                 mock_db.cursor()
-                .execute(f"select distinct(table_name) from information_schema.tables")
+                .execute("select distinct(table_name) from information_schema.tables")
                 .fetchall()
             )
             if any(x in target for x in protected_strs):
@@ -295,13 +294,13 @@ def test_build_study(mock_db, study_path, verbose, expects, raises):
         (
             "./tests/test_data/psm/",
             False,
-            (f"psm_test__psm_encounter_covariate",),
+            ("psm_test__psm_encounter_covariate",),
             does_not_raise(),
         ),
         (
             "./tests/test_data/psm/",
             True,
-            (f"psm_test__psm_encounter_covariate",),
+            ("psm_test__psm_encounter_covariate",),
             does_not_raise(),
         ),
     ],
diff --git a/tests/test_template_sql_utils.py b/tests/test_template_sql_utils.py
index 0c44614f..2136cc9c 100644
--- a/tests/test_template_sql_utils.py
+++ b/tests/test_template_sql_utils.py
@@ -1,9 +1,9 @@
 """ tests for the cli interface to studies """
 
-import duckdb
+from contextlib import nullcontext as does_not_raise
+
 import pytest
 
-from contextlib import nullcontext as does_not_raise
 from cumulus_library.template_sql import sql_utils
 
 
diff --git a/tests/test_templates.py b/tests/test_templates.py
index 6da24bcf..ab89b3ec 100644
--- a/tests/test_templates.py
+++ b/tests/test_templates.py
@@ -1,7 +1,6 @@
 """ tests for jinja sql templates """
 
 import pytest
-
 from pandas import DataFrame
 
 from cumulus_library.template_sql import base_templates, sql_utils

From 97139cb8737afa608ac9b0c29cae81537881bfec Mon Sep 17 00:00:00 2001
From: Matt Garber <matthew.garber@childrens.harvard.edu>
Date: Tue, 6 Feb 2024 15:06:59 -0500
Subject: [PATCH 2/7] cleanup ci syntax

---
 .github/workflows/ci.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 2c017949..0ca2b5b1 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -50,7 +50,8 @@ jobs:
       - name: Run ruff
         if: success() || failure() # still run black if above checks fails
         run: |
-          ruff
+          ruff check
+          ruff format --check
   regression:
     runs-on: ubuntu-22.04
     permissions:

From 8f8c15f729a266805fb3651a54229295bb8124bb Mon Sep 17 00:00:00 2001
From: Matt Garber <matthew.garber@childrens.harvard.edu>
Date: Wed, 7 Feb 2024 08:59:56 -0500
Subject: [PATCH 3/7] PR feedback

---
 .pre-commit-config.yaml                       | 10 ++--
 cumulus_library/base_table_builder.py         |  3 +-
 cumulus_library/cli.py                        |  3 +-
 .../studies/discovery/code_detection.py       | 50 ++++++++++---------
 .../template_sql/base_templates.py            |  9 ++--
 pyproject.toml                                |  7 ++-
 tests/regression/run_regression.py            |  2 +-
 tests/test_cli.py                             | 11 ++--
 tests/test_conftest.py                        |  2 +-
 9 files changed, 50 insertions(+), 47 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 84a4db32..a3ead3e8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,11 +2,13 @@ repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.2.1
     hooks:
-      - id: ruff
-        args: [--fix]
-      - id: ruff-format
+      - name: Ruff formatting
+        id: ruff-format
+      - name: Ruff linting
+        id: ruff
+        stages: [pre-push]
 
   - repo: https://github.com/sqlfluff/sqlfluff
     rev: 2.3.4
     hooks:
-      - id: sqlfluff-lint
+      - id: sqlfluff-lint
\ No newline at end of file
diff --git a/cumulus_library/base_table_builder.py b/cumulus_library/base_table_builder.py
index 0786bf4a..877e77a0 100644
--- a/cumulus_library/base_table_builder.py
+++ b/cumulus_library/base_table_builder.py
@@ -122,9 +122,10 @@ def comment_queries(self, doc_str=None):
         self.queries = commented_queries
 
     def write_queries(self, path: pathlib.Path | None = None):
+        """writes all queries constructed by prepare_queries to disk"""
         if path is None:
             path = pathlib.Path.cwd() / "output.sql"
-        """writes all queries constructed by prepare_queries to disk"""
+
         path.parents[0].mkdir(parents=True, exist_ok=True)
         with open(path, "w", encoding="utf-8") as file:
             for query in self.queries:
diff --git a/cumulus_library/cli.py b/cumulus_library/cli.py
index 6c3ec07f..df63b6ab 100755
--- a/cumulus_library/cli.py
+++ b/cumulus_library/cli.py
@@ -264,9 +264,8 @@ def get_study_dict(alt_dir_paths: list) -> dict[str, pathlib.Path] | None:
         pathlib.Path(cli_path, "./module_allowlist.json"), encoding="utf-8"
     ) as study_allowlist_json:
         study_allowlist = json.load(study_allowlist_json)["allowlist"]
-    site_packages_dir = "".join(sysconfig.get_path("purelib"))
+    site_packages_dir = sysconfig.get_path("purelib")
     for study, subdir in study_allowlist.items():
-        print(site_packages_dir)
         study_path = pathlib.Path(site_packages_dir, subdir)
         if study_path.exists():
             manifest_studies[study] = study_path
diff --git a/cumulus_library/studies/discovery/code_detection.py b/cumulus_library/studies/discovery/code_detection.py
index 5e8c449b..f73d96d7 100644
--- a/cumulus_library/studies/discovery/code_detection.py
+++ b/cumulus_library/studies/discovery/code_detection.py
@@ -8,6 +8,30 @@
 class CodeDetectionBuilder(base_table_builder.BaseTableBuilder):
     display_text = "Selecting unique code systems..."
 
+    def _check_coding_against_db(code_source, schema, cursor):
+        """selects the appropriate DB query to run"""
+
+        if code_source["is_array"]:
+            return sql_utils.is_codeable_concept_array_populated(
+                schema,
+                code_source["table_name"],
+                code_source["column_name"],
+                cursor,
+            )
+        elif code_source["is_bare_coding"]:
+            return sql_utils.is_code_populated(
+                schema,
+                code_source["table_name"],
+                code_source["column_name"],
+                cursor,
+            )
+        return sql_utils.is_codeable_concept_populated(
+            schema,
+            code_source["table_name"],
+            code_source["column_name"],
+            cursor,
+        )
+
     def _check_codes_in_fields(self, code_sources: list[dict], schema, cursor) -> dict:
         """checks if Coding/CodeableConcept fields are present and populated"""
 
@@ -17,29 +41,9 @@ def _check_codes_in_fields(self, code_sources: list[dict], schema, cursor) -> di
                 total=len(code_sources),
             )
             for code_source in code_sources:
-                if code_source["is_array"]:
-                    code_source[
-                        "has_data"
-                    ] = sql_utils.is_codeable_concept_array_populated(
-                        schema,
-                        code_source["table_name"],
-                        code_source["column_name"],
-                        cursor,
-                    )
-                elif code_source["is_bare_coding"]:
-                    code_source["has_data"] = sql_utils.is_code_populated(
-                        schema,
-                        code_source["table_name"],
-                        code_source["column_name"],
-                        cursor,
-                    )
-                else:
-                    code_source["has_data"] = sql_utils.is_codeable_concept_populated(
-                        schema,
-                        code_source["table_name"],
-                        code_source["column_name"],
-                        cursor,
-                    )
+                code_source["has_data"] = self._check_coding_against_db(
+                    code_source, schema, cursor
+                )
                 progress.advance(task)
         return code_sources
 
diff --git a/cumulus_library/template_sql/base_templates.py b/cumulus_library/template_sql/base_templates.py
index 8b464d1f..8d22fd86 100644
--- a/cumulus_library/template_sql/base_templates.py
+++ b/cumulus_library/template_sql/base_templates.py
@@ -211,8 +211,7 @@ def get_insert_into_query(
     :param table_cols: Comma deleniated column names, i.e. ['first','second']
     :param dataset: Array of data arrays to insert, i.e. [['1','3'],['2','4']]
     """
-    if type_casts is None:
-        type_casts = {}
+    type_casts = type_casts or {}
     with open(f"{PATH}/insert_into.sql.jinja") as insert_into:
         return Template(insert_into.read()).render(
             table_name=table_name,
@@ -228,10 +227,8 @@ def get_is_table_not_empty_query(
     unnests: list[dict] | None = None,
     conditions: list[str] | None = None,
 ):
-    if unnests is None:
-        unnests = []
-    if conditions is None:
-        conditions = []
+    unnests = unnests or []
+    conditions = conditions or []
     with open(f"{PATH}/is_table_not_empty.sql.jinja") as is_table_not_empty:
         return Template(is_table_not_empty.read()).render(
             source_table=source_table,
diff --git a/pyproject.toml b/pyproject.toml
index f9459556..993b4e58 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,7 +58,8 @@ include = [".sqlfluff"]
 [tool.ruff]
 target-version = "py310"
 
-lint.select = [
+[tool.ruff.lint]
+select = [
     "A",  # prevent using keywords that clobber python builtins
     "B",  # bugbear: security warnings
     "E",  # pycodestyle
@@ -69,9 +70,11 @@ lint.select = [
     "RUF",  # the ruff developer's own rules
     "UP",  # alert you when better syntax is available in your python version
 ]
-lint.ignore = [
+ignore = [
     "ISC001"  # Recommended ingore from `ruff format`
 ]
 
 [tool.ruff.lint.per-file-ignores]
+# ./cumulus_library/schema needs general revisiting, deferring on style maintenance
+# until that occurs
 "cumulus_library/schema/valueset.py" = ["E501"]
diff --git a/tests/regression/run_regression.py b/tests/regression/run_regression.py
index 60691e3b..d141ce43 100644
--- a/tests/regression/run_regression.py
+++ b/tests/regression/run_regression.py
@@ -58,7 +58,7 @@
             diffs.append(
                 [
                     filename,
-                    ("Rows differ between reference and export:\n", f"{compared}"),
+                    f"Rows differ between reference and export:\n {compared}",
                 ]
             )
 if len(diffs) > 0:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 6560d287..c03e0118 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -28,7 +28,7 @@ def open(self, *args, **kwargs):
             args = (
                 Path(
                     "./tests/test_data/mock_bsvs/",
-                    f"{str(args[0]).rsplit('/', maxsplit=1)[-1]}",
+                    str(args[0]).rsplit("/", maxsplit=1)[-1],
                 ),
                 "r",
             )
@@ -102,10 +102,7 @@ def test_cli_early_exit(args):
 )
 def test_cli_path_mapping(mock_load_json, mock_path, tmp_path, args, raises, expected):  # pylint: disable=unused-argument
     with raises:
-        mock_path.return_value = (
-            f"{Path(__file__).resolve().parents[0]}",
-            "/test_data/",
-        )
+        mock_path.return_value = f"{Path(__file__).resolve().parents[0]}/test_data/"
         mock_load_json.return_value = {
             "__desc__": "",
             "allowlist": {
@@ -124,7 +121,7 @@ def test_cli_path_mapping(mock_load_json, mock_path, tmp_path, args, raises, exp
 )
 @mock.patch("sysconfig.get_path")
 def test_count_builder_mapping(mock_path, tmp_path):
-    mock_path.return_value = (f"{Path(__file__).resolve().parents[0]}", "/test_data/")
+    mock_path.return_value = f"{Path(__file__).resolve().parents[0]}/test_data/"
     with does_not_raise():
         args = duckdb_args(
             [
@@ -220,7 +217,7 @@ def test_generate_sql(mock_path, tmp_path):
     ],
 )
 def test_clean(mock_path, tmp_path, args, expected):  # pylint: disable=unused-argument
-    mock_path.return_value = (f"{Path(__file__).resolve().parents[0]}", "/test_data/")
+    mock_path.return_value = f"{Path(__file__).resolve().parents[0]}/test_data/"
     cli.main(
         cli_args=duckdb_args(["build", "-t", "core", "--database", "test"], tmp_path)
     )
diff --git a/tests/test_conftest.py b/tests/test_conftest.py
index 7daf0daa..97b118b7 100644
--- a/tests/test_conftest.py
+++ b/tests/test_conftest.py
@@ -23,7 +23,7 @@ def test_ndjson_data_generator(tmp_path):
                 try:
                     *_, last_ref = f
                     last_ref = json.loads(last_ref)
-                except Exception:
+                except ValueError:
                     last_ref = first_ref
             for source in [[first_new, first_ref, 0], [last_new, last_ref, iters - 1]]:
                 for id_path in ID_PATHS[key]:

From a9659e2ccec2d353f01b57b13bd902ec47b54669 Mon Sep 17 00:00:00 2001
From: Matt Garber <matthew.garber@childrens.harvard.edu>
Date: Wed, 7 Feb 2024 10:18:22 -0500
Subject: [PATCH 4/7] docs tweak

---
 CONTRIBUTING.md | 2 +-
 pyproject.toml  | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 56452b54..d53f26cb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,7 +5,7 @@
 To use the same dev environment as us, you'll want to run these commands:
 ```sh
 pip install .[dev]
-pre-commit install
+pre-commit install --hook-type pre-commit --hook-type pre-push
 ```
 
 This will install dependencies & build tools,
diff --git a/pyproject.toml b/pyproject.toml
index 993b4e58..53bd53cb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,7 +71,9 @@ select = [
     "UP",  # alert you when better syntax is available in your python version
 ]
 ignore = [
-    "ISC001"  # Recommended ingore from `ruff format`
+# Recommended ingore from `ruff format` due to in-project conflicts with check.
+# It's expected that this will be fixed in the coming months.
+    "ISC001"
 ]
 
 [tool.ruff.lint.per-file-ignores]

From b5cab45f706d7c756852a76c71f424b6baf9b0b0 Mon Sep 17 00:00:00 2001
From: Matt Garber <matthew.garber@childrens.harvard.edu>
Date: Wed, 7 Feb 2024 10:23:38 -0500
Subject: [PATCH 5/7] now with default install hook types

---
 .pre-commit-config.yaml | 1 +
 CONTRIBUTING.md         | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a3ead3e8..50436d83 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,3 +1,4 @@
+default_install_hook_types: [pre-commit, pre-push]
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: v0.2.1
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d53f26cb..56452b54 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,7 +5,7 @@
 To use the same dev environment as us, you'll want to run these commands:
 ```sh
 pip install .[dev]
-pre-commit install --hook-type pre-commit --hook-type pre-push
+pre-commit install
 ```
 
 This will install dependencies & build tools,

From 64f78cd48d6aceff369d86711ea59c17b1eded9c Mon Sep 17 00:00:00 2001
From: Matt Garber <matthew.garber@childrens.harvard.edu>
Date: Wed, 7 Feb 2024 10:45:24 -0500
Subject: [PATCH 6/7] type tweak

---
 cumulus_library/databases.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cumulus_library/databases.py b/cumulus_library/databases.py
index 17bc6ca5..0b592e45 100644
--- a/cumulus_library/databases.py
+++ b/cumulus_library/databases.py
@@ -44,7 +44,7 @@ def fetchall(self) -> list[list] | None:
 class DatabaseParser(abc.ABC):
     """Parses information_schema results from a database"""
 
-    def _parse_found_schema(self, expected: dict[dict[list]], schema: list[tuple]):
+    def _parse_found_schema(self, expected: dict[dict[list]], schema: list[list]):
         """Checks for presence of field for each column in a table
 
         :param expected: A nested dict describing the expected data format of

From 072e6ac74b70f1d657d7a9320b6bb40055ff1e03 Mon Sep 17 00:00:00 2001
From: Matt Garber <matthew.garber@childrens.harvard.edu>
Date: Wed, 7 Feb 2024 11:10:52 -0500
Subject: [PATCH 7/7] type tweaks

---
 cumulus_library/databases.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cumulus_library/databases.py b/cumulus_library/databases.py
index 0b592e45..789d415d 100644
--- a/cumulus_library/databases.py
+++ b/cumulus_library/databases.py
@@ -181,7 +181,7 @@ def close(self) -> None:
 
 class AthenaParser(DatabaseParser):
     def validate_table_schema(
-        self, expected: dict[dict[list]], schema: list[tuple]
+        self, expected: dict[dict[list]], schema: list[list]
     ) -> bool:
         schema = dict(schema)
         return self._parse_found_schema(expected, schema)