Skip to content

Commit

Permalink
feat(directory): publish studies (#4511)
Browse files Browse the repository at this point in the history
* chore(directory): change env variable name

* chore(directory): include env example file

* feat(directory): publish studies

* fix(directory): studies before collections and facts in table order

* chore: create new release version of the publish script

---------

Co-authored-by: Hessel Haagsma <[email protected]>
  • Loading branch information
dtroelofsprins and hslh authored Dec 5, 2024
1 parent 9fd49ec commit da56d5c
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 36 deletions.
3 changes: 3 additions & 0 deletions tools/directory/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Changelog

## Version 1.1.1
- Include publishing of the Studies table

## Version 1.0.0
- EMX2 version of the Python tooling for a BBMRI Biobank Directory
5 changes: 5 additions & 0 deletions tools/directory/dev/.env_example
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
TARGET="https://your-server"
USERNAME=****
PASSWORD=****
DIRECTORY="name of the BBMRI-ERIC schema"
NN_SCHEMA_PREFIX="National nodes are stored as two letter (country) codes might have a schema prefix"
42 changes: 21 additions & 21 deletions tools/directory/setup.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
[bumpversion]
commit = False
tag = False
tag_name = {new_version}
current_version = 1.1.0
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{build}
{major}.{minor}.{patch}

[metadata]
name = molgenis-emx2-directory-client
description = MOLGENIS EMX2 Python tooling for a BBMRI Biobank Directory
Expand All @@ -7,10 +17,10 @@ license = LGPL-3.0-only
long_description = file: README.md
long_description_content_type = text/markdown; charset=UTF-8; variant=GFM
url = https://github.com/molgenis/molgenis-emx2/tree/master/tools/directory/
project_urls =
project_urls =
Source = https://github.com/molgenis/molgenis-emx2/tree/master/tools/directory/
platforms = any
classifiers =
classifiers =
License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)
Programming Language :: Python
Programming Language :: Python :: 3 :: Only
Expand All @@ -20,10 +30,10 @@ classifiers =
zip_safe = False
packages = find_namespace:
include_package_data = True
package_dir =
package_dir =
=src
python_requires = >=3.10
install_requires =
install_requires =
dataclasses
molgenis-emx2-pyclient>=11.23.0
requests>=2.21.0
Expand All @@ -32,24 +42,24 @@ install_requires =

[options.packages.find]
where = src
exclude =
exclude =
tests

[options.extras_require]
testing =
testing =
setuptools
pytest
pytest-cov
pytest-asyncio
pytest-mock

[tool:pytest]
addopts =
addopts =
--cov molgenis_emx2.directory_client --cov-report term-missing
--verbose
--junitxml junit.xml
asyncio_default_fixture_loop_scope = "function"
norecursedirs =
norecursedirs =
dist
build
.tox
Expand All @@ -65,27 +75,17 @@ formats = bdist_wheel
[flake8]
max_line_length = 88
extend_ignore = E203, W503
exclude =
exclude =
.tox
build
dist
.eggs
docs/conf.py

[bumpversion]
commit = False
tag = False
tag_name = {new_version}
current_version = 1.0.0
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
serialize =
{major}.{minor}.{patch}-{release}{build}
{major}.{minor}.{patch}

[bumpversion:part:release]
optional_value = prod
first_value = dev
values =
values =
dev
prod

Expand All @@ -102,7 +102,7 @@ replace = __version__ = '{new_version}'
[pyscaffold]
version = 4.0.2
package = directory_client
extensions =
extensions =
markdown
namespace
pre_commit
Expand Down
5 changes: 1 addition & 4 deletions tools/directory/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@

if __name__ == "__main__":
try:
setup(
version='1.0.0',
use_scm_version={"version_scheme": "no-guess-dev"})
setup(version="1.1.0", use_scm_version={"version_scheme": "no-guess-dev"})
except: # noqa
print(
"\n\nAn error occurred while building the project, "
Expand All @@ -21,4 +19,3 @@
" pip install -U setuptools setuptools_scm wheel\n\n"
)
raise

Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
finally:
del version, PackageNotFoundError

__version__ = '1.0.0'
__version__ = "1.1.0"
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def _init_state(self, nodes: List[Node], report: ErrorReport) -> PublishingState
biobanks=["id", "pid", "name", "national_node", "withdrawn"],
collections=["id", "national_node"],
facts=["id", "national_node"],
studies=["id", "national_node"],
),
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class AttributesRequest:
networks: List[str]
also_known_in: List[str]
biobanks: List[str]
studies: List[str]
collections: List[str]
facts: List[str]

Expand Down Expand Up @@ -319,7 +320,7 @@ def _reset_data_types(data: List, meta: MetaTable):

def get_staging_node_data(self, node: Node) -> NodeData:
"""
Gets the six tables that belong to a single node's staging area.
Gets the tables that belong to a single node's staging area.
:param Node node: the node to get the staging data for
:return: a NodeData object
Expand All @@ -341,7 +342,7 @@ def get_staging_node_data(self, node: Node) -> NodeData:

def get_published_node_data(self, node: Node) -> NodeData:
"""
Gets the six tables that belong to a single node from the published tables.
Gets the tables that belong to a single node from the published tables.
Filters the rows based on the national_node field.
:param Node node: the node to get the published data for
Expand All @@ -368,7 +369,7 @@ def get_published_data(
self, nodes: List[Node], attributes: AttributesRequest
) -> MixedData:
"""
Gets the six tables that belong to one or more nodes from the Directory tables.
Gets the tables that belong to one or more nodes from the Directory tables.
Filters the rows based on the national_node field.
:param List[Node] nodes: the node(s) to get the Directory data for
Expand Down Expand Up @@ -404,7 +405,7 @@ def get_published_data(

async def upload_data(self, schema: str, data: DirectoryData):
"""
Converts the six tables of a DirectoryData object to CSV, bundles them in
Converts the tables of a DirectoryData object to CSV, bundles them in
a ZIP archive and imports them through the import API.
:param schema: database where data should be uploaded into
:param data: a DirectoryData object
Expand Down
19 changes: 13 additions & 6 deletions tools/directory/src/molgenis_emx2/directory_client/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@


class TableType(Enum):
"""Enum representing the six tables each national node has."""
"""Enum representing the tables each national node has."""

PERSONS = "persons"
ALSO_KNOWN = "also_known_in"
NETWORKS = "networks"
BIOBANKS = "biobanks"
STUDIES = "studies"
COLLECTIONS = "collections"
FACTS = "facts"

Expand Down Expand Up @@ -207,12 +208,13 @@ class Node:
TableType.NETWORKS: "networkID",
TableType.ALSO_KNOWN: "akiID",
TableType.BIOBANKS: "ID",
TableType.STUDIES: "studyID",
TableType.COLLECTIONS: "ID",
TableType.FACTS: "factID",
}

def get_schema_id(self) -> str:
return f"{os.getenv('SCHEMA_PREFIX')}-{self.code}"
return f"{os.getenv('NN_SCHEMA_PREFIX')}-{self.code}"

@staticmethod
def get_staging_id(table_type: TableType) -> str:
Expand Down Expand Up @@ -285,14 +287,15 @@ class Source(Enum):

@dataclass
class DirectoryData(ABC):
"""Abstract base class for containers storing rows from the six Directory tables:
persons, networks, also_known_in, biobanks, collections and facts."""
"""Abstract base class for containers storing rows from the seven Directory tables:
persons, networks, also_known_in, biobanks, collections, facts and studies."""

source: Source
persons: Table
also_known_in: Table
networks: Table
biobanks: Table
studies: Table
collections: Table
facts: Table
table_by_type: Dict[TableType, Table] = field(init=False)
Expand All @@ -303,6 +306,7 @@ def __post_init__(self):
TableType.NETWORKS: self.networks,
TableType.ALSO_KNOWN: self.also_known_in,
TableType.BIOBANKS: self.biobanks,
TableType.STUDIES: self.studies,
TableType.COLLECTIONS: self.collections,
TableType.FACTS: self.facts,
}
Expand All @@ -314,14 +318,15 @@ def import_order(self) -> List[Table]:
self.networks,
self.also_known_in,
self.biobanks,
self.studies,
self.collections,
self.facts,
]


@dataclass
class NodeData(DirectoryData):
"""Container object storing the six tables of a single node."""
"""Container object storing the tables of a single node."""

node: Node

Expand Down Expand Up @@ -350,7 +355,7 @@ def convert_to_staging(self) -> "NodeData":


class MixedData(DirectoryData):
"""Container object storing the six tables with mixed origins, for example from
"""Container object storing the tables with mixed origins, for example from
the combined tables or from multiple staging areas."""

@staticmethod
Expand All @@ -362,6 +367,7 @@ def merge(self, other_data: DirectoryData):
self.networks.rows_by_id.update(other_data.networks.rows_by_id)
self.also_known_in.rows_by_id.update(other_data.also_known_in.rows_by_id)
self.biobanks.rows_by_id.update(other_data.biobanks.rows_by_id)
self.studies.rows_by_id.update(other_data.studies.rows_by_id)
self.collections.rows_by_id.update(other_data.collections.rows_by_id)
self.facts.rows_by_id.update(other_data.facts.rows_by_id)

Expand All @@ -379,6 +385,7 @@ def copy_empty(self) -> "MixedData":
networks=Table.of_empty(TableType.NETWORKS, self.networks.meta),
also_known_in=Table.of_empty(TableType.ALSO_KNOWN, self.also_known_in.meta),
biobanks=Table.of_empty(TableType.BIOBANKS, self.biobanks.meta),
studies=Table.of_empty(TableType.STUDIES, self.studies.meta),
collections=Table.of_empty(TableType.COLLECTIONS, self.collections.meta),
facts=Table.of_empty(TableType.FACTS, self.facts.meta),
)
Expand Down

0 comments on commit da56d5c

Please sign in to comment.