feat(directory): publish studies (#4511)

* chore(directory): change env variable name * chore(directory): include env example file * feat(directory): publish studies * fix(directory): studies before collections and facts in table order * chore: create new release version of the publish script --------- Co-authored-by: Hessel Haagsma <[email protected]>
molgenis · Dec 5, 2024 · da56d5c · da56d5c
1 parent 9fd49ec
commit da56d5c
Show file tree

Hide file tree

Showing 8 changed files with 50 additions and 36 deletions.
diff --git a/tools/directory/CHANGELOG.md b/tools/directory/CHANGELOG.md
@@ -1,4 +1,7 @@
 # Changelog
 
+## Version 1.1.1
+- Include publishing of the Studies table
+
 ## Version 1.0.0
 - EMX2 version of the Python tooling for a BBMRI Biobank Directory
diff --git a/tools/directory/dev/.env_example b/tools/directory/dev/.env_example
@@ -0,0 +1,5 @@
+TARGET="https://your-server"
+USERNAME=****
+PASSWORD=****
+DIRECTORY="name of the BBMRI-ERIC schema"
+NN_SCHEMA_PREFIX="National nodes are stored as two letter (country) codes might have a schema prefix"
diff --git a/tools/directory/setup.cfg b/tools/directory/setup.cfg
@@ -1,3 +1,13 @@
+[bumpversion]
+commit = False
+tag = False
+tag_name = {new_version}
+current_version = 1.1.0
+parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
+serialize =
+	{major}.{minor}.{patch}-{release}{build}
+	{major}.{minor}.{patch}
+
 [metadata]
 name = molgenis-emx2-directory-client
 description = MOLGENIS EMX2 Python tooling for a BBMRI Biobank Directory
@@ -7,10 +17,10 @@ license = LGPL-3.0-only
 long_description = file: README.md
 long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
 url = https://github.com/molgenis/molgenis-emx2/tree/master/tools/directory/
-project_urls = 
+project_urls =
 	Source = https://github.com/molgenis/molgenis-emx2/tree/master/tools/directory/
 platforms = any
-classifiers = 
+classifiers =
 	License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
 	Programming Language :: Python
 	Programming Language :: Python :: 3 :: Only
@@ -20,10 +30,10 @@ classifiers =
 zip_safe = False
 packages = find_namespace:
 include_package_data = True
-package_dir = 
+package_dir =
 	=src
 python_requires = >=3.10
-install_requires = 
+install_requires =
 	dataclasses
 	molgenis-emx2-pyclient>=11.23.0
 	requests>=2.21.0
@@ -32,24 +42,24 @@ install_requires =
 
 [options.packages.find]
 where = src
-exclude = 
+exclude =
 	tests
 
 [options.extras_require]
-testing = 
+testing =
 	setuptools
 	pytest
 	pytest-cov
 	pytest-asyncio
 	pytest-mock
 
 [tool:pytest]
-addopts = 
+addopts =
 	--cov molgenis_emx2.directory_client --cov-report term-missing
 	--verbose
 	--junitxml junit.xml
 asyncio_default_fixture_loop_scope = "function"
-norecursedirs = 
+norecursedirs =
 	dist
 	build
 	.tox
@@ -65,27 +75,17 @@ formats = bdist_wheel
 [flake8]
 max_line_length = 88
 extend_ignore = E203, W503
-exclude = 
+exclude =
 	.tox
 	build
 	dist
 	.eggs
 	docs/conf.py
 
-[bumpversion]
-commit = False
-tag = False
-tag_name = {new_version}
-current_version = 1.0.0
-parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
-serialize =
-	{major}.{minor}.{patch}-{release}{build}
-	{major}.{minor}.{patch}
-
 [bumpversion:part:release]
 optional_value = prod
 first_value = dev
-values = 
+values =
 	dev
 	prod
 
@@ -102,7 +102,7 @@ replace = __version__ = '{new_version}'
 [pyscaffold]
 version = 4.0.2
 package = directory_client
-extensions = 
+extensions =
 	markdown
 	namespace
 	pre_commit

diff --git a/tools/directory/setup.py b/tools/directory/setup.py
@@ -10,9 +10,7 @@
 
 if __name__ == "__main__":
     try:
-        setup(
-            version='1.0.0',
-            use_scm_version={"version_scheme": "no-guess-dev"})
+        setup(version="1.1.0", use_scm_version={"version_scheme": "no-guess-dev"})
     except:  # noqa
         print(
             "\n\nAn error occurred while building the project, "
@@ -21,4 +19,3 @@
             "   pip install -U setuptools setuptools_scm wheel\n\n"
         )
         raise
-
diff --git a/tools/directory/src/molgenis_emx2/directory_client/__init__.py b/tools/directory/src/molgenis_emx2/directory_client/__init__.py
@@ -15,4 +15,4 @@
 finally:
     del version, PackageNotFoundError
 
-__version__ = '1.0.0'
+__version__ = "1.1.0"
diff --git a/tools/directory/src/molgenis_emx2/directory_client/directory.py b/tools/directory/src/molgenis_emx2/directory_client/directory.py
@@ -93,6 +93,7 @@ def _init_state(self, nodes: List[Node], report: ErrorReport) -> PublishingState
                 biobanks=["id", "pid", "name", "national_node", "withdrawn"],
                 collections=["id", "national_node"],
                 facts=["id", "national_node"],
+                studies=["id", "national_node"],
             ),
         )
 

diff --git a/tools/directory/src/molgenis_emx2/directory_client/directory_client.py b/tools/directory/src/molgenis_emx2/directory_client/directory_client.py
@@ -37,6 +37,7 @@ class AttributesRequest:
     networks: List[str]
     also_known_in: List[str]
     biobanks: List[str]
+    studies: List[str]
     collections: List[str]
     facts: List[str]
 
@@ -319,7 +320,7 @@ def _reset_data_types(data: List, meta: MetaTable):
 
     def get_staging_node_data(self, node: Node) -> NodeData:
         """
-        Gets the six tables that belong to a single node's staging area.
+        Gets the tables that belong to a single node's staging area.
 
         :param Node node: the node to get the staging data for
         :return: a NodeData object
@@ -341,7 +342,7 @@ def get_staging_node_data(self, node: Node) -> NodeData:
 
     def get_published_node_data(self, node: Node) -> NodeData:
         """
-        Gets the six tables that belong to a single node from the published tables.
+        Gets the tables that belong to a single node from the published tables.
         Filters the rows based on the national_node field.
 
         :param Node node: the node to get the published data for
@@ -368,7 +369,7 @@ def get_published_data(
         self, nodes: List[Node], attributes: AttributesRequest
     ) -> MixedData:
         """
-        Gets the six tables that belong to one or more nodes from the Directory tables.
+        Gets the tables that belong to one or more nodes from the Directory tables.
         Filters the rows based on the national_node field.
 
         :param List[Node] nodes: the node(s) to get the Directory data for
@@ -404,7 +405,7 @@ def get_published_data(
 
     async def upload_data(self, schema: str, data: DirectoryData):
         """
-        Converts the six tables of a DirectoryData object to CSV, bundles them in
+        Converts the tables of a DirectoryData object to CSV, bundles them in
         a ZIP archive and imports them through the import API.
         :param schema: database where data should be uploaded into
         :param data: a DirectoryData object

diff --git a/tools/directory/src/molgenis_emx2/directory_client/model.py b/tools/directory/src/molgenis_emx2/directory_client/model.py
@@ -11,12 +11,13 @@
 
 
 class TableType(Enum):
-    """Enum representing the six tables each national node has."""
+    """Enum representing the tables each national node has."""
 
     PERSONS = "persons"
     ALSO_KNOWN = "also_known_in"
     NETWORKS = "networks"
     BIOBANKS = "biobanks"
+    STUDIES = "studies"
     COLLECTIONS = "collections"
     FACTS = "facts"
 
@@ -207,12 +208,13 @@ class Node:
         TableType.NETWORKS: "networkID",
         TableType.ALSO_KNOWN: "akiID",
         TableType.BIOBANKS: "ID",
+        TableType.STUDIES: "studyID",
         TableType.COLLECTIONS: "ID",
         TableType.FACTS: "factID",
     }
 
     def get_schema_id(self) -> str:
-        return f"{os.getenv('SCHEMA_PREFIX')}-{self.code}"
+        return f"{os.getenv('NN_SCHEMA_PREFIX')}-{self.code}"
 
     @staticmethod
     def get_staging_id(table_type: TableType) -> str:
@@ -285,14 +287,15 @@ class Source(Enum):
 
 @dataclass
 class DirectoryData(ABC):
-    """Abstract base class for containers storing rows from the six Directory tables:
-    persons, networks, also_known_in, biobanks, collections and facts."""
+    """Abstract base class for containers storing rows from the seven Directory tables:
+    persons, networks, also_known_in, biobanks, collections, facts and studies."""
 
     source: Source
     persons: Table
     also_known_in: Table
     networks: Table
     biobanks: Table
+    studies: Table
     collections: Table
     facts: Table
     table_by_type: Dict[TableType, Table] = field(init=False)
@@ -303,6 +306,7 @@ def __post_init__(self):
             TableType.NETWORKS: self.networks,
             TableType.ALSO_KNOWN: self.also_known_in,
             TableType.BIOBANKS: self.biobanks,
+            TableType.STUDIES: self.studies,
             TableType.COLLECTIONS: self.collections,
             TableType.FACTS: self.facts,
         }
@@ -314,14 +318,15 @@ def import_order(self) -> List[Table]:
             self.networks,
             self.also_known_in,
             self.biobanks,
+            self.studies,
             self.collections,
             self.facts,
         ]
 
 
 @dataclass
 class NodeData(DirectoryData):
-    """Container object storing the six tables of a single node."""
+    """Container object storing the tables of a single node."""
 
     node: Node
 
@@ -350,7 +355,7 @@ def convert_to_staging(self) -> "NodeData":
 
 
 class MixedData(DirectoryData):
-    """Container object storing the six tables with mixed origins, for example from
+    """Container object storing the tables with mixed origins, for example from
     the combined tables or from multiple staging areas."""
 
     @staticmethod
@@ -362,6 +367,7 @@ def merge(self, other_data: DirectoryData):
         self.networks.rows_by_id.update(other_data.networks.rows_by_id)
         self.also_known_in.rows_by_id.update(other_data.also_known_in.rows_by_id)
         self.biobanks.rows_by_id.update(other_data.biobanks.rows_by_id)
+        self.studies.rows_by_id.update(other_data.studies.rows_by_id)
         self.collections.rows_by_id.update(other_data.collections.rows_by_id)
         self.facts.rows_by_id.update(other_data.facts.rows_by_id)
 
@@ -379,6 +385,7 @@ def copy_empty(self) -> "MixedData":
             networks=Table.of_empty(TableType.NETWORKS, self.networks.meta),
             also_known_in=Table.of_empty(TableType.ALSO_KNOWN, self.also_known_in.meta),
             biobanks=Table.of_empty(TableType.BIOBANKS, self.biobanks.meta),
+            studies=Table.of_empty(TableType.STUDIES, self.studies.meta),
             collections=Table.of_empty(TableType.COLLECTIONS, self.collections.meta),
             facts=Table.of_empty(TableType.FACTS, self.facts.meta),
         )