From de46493fc391ff660235c04af36218849b6dbf0d Mon Sep 17 00:00:00 2001 From: YaphetKG <45075777+YaphetKG@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:10:00 -0400 Subject: [PATCH] Release 0.10.4 (#89) * Update _version.py (#86) * Update _version.py * Rti merge (#84) * roger cli preped for Merge Deploy * Update Makefile to work with python env * Update redisgraph-bulk-loader to fix issue with loading MODULE LIST * Revert "Update redisgraph-bulk-loader to fix issue with loading MODULE LIST" This reverts commit 7baf7efa725caac77e5501e948f545a0f4b20e3d. * Finalized dev deployment of dug inside Catapult Merge, deployment yamls, code changes and configurations * updated to reflect the Dug-Api updates to FastAPI * adding multi label redis by removing 'biolink:' on nodes, edges cannot be fixed after update so they need to be solved either by changing TranQl AND Plater or forking bulk-redisgraph to allow for colons to be added in the edges * Working multi label redis nodes w/ no biolink label * Latest code changes to deploy working Roger in Merge * biolink data move to '.' separator * updates to include new dug fixes, upgraded redis-bulk-loader and made changes to for biolink variables to specify it's domain with a 'biolink.' * adding test roger code * removed helm deployments * change docker owner * remove core.py * remove dup dev config * redis graph is not directly used removing cruft * remove print statement * remove logging files * update requriemtns * update requriemtns * add redis graph.py * fix import error for logger * adding es scheme and ca_path config * adding es scheme and ca_path config * adding debug code * removing debug * adding nodes args * adding biolink. * adding biolink. * Update requirements.txt * Update .gitignore * Update dug_utils.py Handle Error when curie not found in validate * Update __init__.py * Update config.yaml * Update dev-config.yaml * Update docker-compose.yaml * fixed docker-compose * adding back postgres volume to docker compose * env correction , docker compose updates --------- Co-authored-by: Nathan Braswell Co-authored-by: esurface Co-authored-by: braswent --------- Co-authored-by: Nathan Braswell Co-authored-by: esurface Co-authored-by: braswent --- .env | 8 +- .gitignore | 4 +- Makefile | 5 +- README.md | 2 +- bin/docker_backend/docker-compose.yaml | 4 +- bin/roger | 2 +- cli.py | 9 +- dags/_version.py | 3 +- dags/dug_helpers/dug_utils.py | 31 ++++-- dags/roger/config/__init__.py | 4 + dags/roger/config/config.yaml | 4 +- dags/roger/{ => config}/dev-config.yaml | 4 +- dags/roger/core/bulkload.py | 24 +++-- dags/roger/core/redis_graph.py | 55 +++++------ dags/test_metadata.yaml | 124 ++++++++++++++++++++++++ docker-compose.yaml | 62 ++++++------ requirements.txt | 8 +- roger-cli-steps.md | 27 ++++++ tests/test_redis_query.cypher | 5 + 19 files changed, 285 insertions(+), 100 deletions(-) rename dags/roger/{ => config}/dev-config.yaml (98%) create mode 100644 dags/test_metadata.yaml create mode 100644 roger-cli-steps.md create mode 100644 tests/test_redis_query.cypher diff --git a/.env b/.env index 7bc31d98..9e2ba0e3 100644 --- a/.env +++ b/.env @@ -15,9 +15,9 @@ ELASTIC_USERNAME=elastic NBOOST_API_HOST=nboost -REDIS_PASSWORD=12345 -REDIS_HOST=redis +REDIS_PASSWORD=weak +REDIS_HOST=merge-redis-master REDIS_PORT=6379 - TRANQL_ACCESS_LOG=access.log -TRANQL_ERROR_LOG=error.log \ No newline at end of file +TRANQL_ERROR_LOG=error.log +ROGER_DUG__INPUTS_DATA__SETS=topmed:v1.0 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 065d4e8b..8ca8ea91 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ # Git ignore bioler plate from https://github.com/github/gitignore/blob/master/Python.gitignore - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -108,6 +107,7 @@ celerybeat.pid *.sage.py # Environments +.secrets-env .venv env/ venv/ @@ -149,4 +149,4 @@ cython_debug/ dags/roger/data local_storage logs -tests/integration/data/bulk/ \ No newline at end of file +tests/integration/data/bulk/ diff --git a/Makefile b/Makefile index 644a6d01..ef227aa4 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ -PYTHON = PYTHONPATH=dags /usr/bin/env python3 +PYTHON = $(shell which python3) +PYTHONPATH = dags VERSION_FILE = ./dags/_version.py VERSION = $(shell cut -d " " -f 3 ${VERSION_FILE}) DOCKER_REPO = docker.io @@ -17,10 +18,12 @@ help: mk_dirs: mkdir -p {logs,plugins} mkdir -p local_storage/elastic + mkdir -p local_storage/redis rm_dirs: rm -rf logs/* rm -rf local_storage/elastic/* + rm -rf local_storage/redis/* rm -rf ./dags/roger/data/* #install: Install application along with required packages to local environment diff --git a/README.md b/README.md index 5ee4a69b..92ed1652 100644 --- a/README.md +++ b/README.md @@ -533,7 +533,7 @@ Open localhost:8080 in a browser. Then run: ``` -python tranql_translator.py +python tranql_translate.py ``` The Airflow interface shows the workflow: ![image](https://user-images.githubusercontent.com/306971/97787955-b968f680-1b8b-11eb-86cc-4d93842eafd3.png) diff --git a/bin/docker_backend/docker-compose.yaml b/bin/docker_backend/docker-compose.yaml index 695363b0..d87c6ae6 100644 --- a/bin/docker_backend/docker-compose.yaml +++ b/bin/docker_backend/docker-compose.yaml @@ -22,9 +22,9 @@ services: - roger-network environment: - REDIS_PASSWORD=$ROGERENV_REDISGRAPH_PASSWORD - entrypoint: /usr/local/bin/gunicorn --workers=2 --bind=0.0.0.0:8081 --name=tranql --timeout=600 tranql.api:app + entrypoint: /usr/local/bin/gunicorn --workers=2 --bind=0.0.0.0:8001 --name=tranql --timeout=600 tranql.api:app ports: - - 8081:8081 + - 8001:8001 volumes: - ./tranql-schema.yaml:/tranql/tranql/conf/schema.yaml ################################################################################# diff --git a/bin/roger b/bin/roger index 55fd4697..4626df88 100755 --- a/bin/roger +++ b/bin/roger @@ -10,7 +10,7 @@ export PYTHONPATH=$ROGER_HOME:$ROGER_HOME/../kgx export DB_NAME=test roger () { - python $ROGER_HOME/roger/core.py $* + python $ROGER_HOME/dags/roger/core.py $* } kgx () { diff --git a/cli.py b/cli.py index 7b6b609f..be77525a 100644 --- a/cli.py +++ b/cli.py @@ -4,12 +4,15 @@ from dug_helpers.dug_utils import DugUtil, get_topmed_files, get_dbgap_files, get_sparc_files, get_anvil_files, get_nida_files import sys import argparse +import os +import time log = get_logger() if __name__ == "__main__": - + start = time.time() + log.info(f"Start TIME:{start}") parser = argparse.ArgumentParser(description='Roger common cli tool.') """ Common CLI. """ parser.add_argument('-d', '--data-root', help="Root of data hierarchy", default=None) @@ -102,4 +105,8 @@ if args.validate_concepts: DugUtil.validate_indexed_concepts(config=config) + end = time.time() + time_elapsed = end - start + log.info(f"Completion TIME:{time_elapsed}") + sys.exit (0) diff --git a/dags/_version.py b/dags/_version.py index 40bcdb68..adcf54c7 100644 --- a/dags/_version.py +++ b/dags/_version.py @@ -1 +1,2 @@ -version = "0.10.3" +version = "0.10.4" + diff --git a/dags/dug_helpers/dug_utils.py b/dags/dug_helpers/dug_utils.py index 85b40d8f..e55fdb34 100644 --- a/dags/dug_helpers/dug_utils.py +++ b/dags/dug_helpers/dug_utils.py @@ -381,10 +381,17 @@ def _search_elements(self, curie, search_term): query=search_term )) ids_dict = [] - for element_type in response: - all_elements_ids = [e['id'] for e in - reduce(lambda x, y: x + y['elements'], response[element_type], [])] - ids_dict += all_elements_ids + if 'total_items' in response: + if response['total_items'] == 0: + log.error(f"No search elements returned for variable search: {self.variables_index}.") + log.error(f"Concept id : {curie}, Search term: {search_term}") + raise Exception(f"Validation error - Did not find {curie} for" + f"Search term: {search_term}") + else: + for element_type in response: + all_elements_ids = [e['id'] for e in + reduce(lambda x, y: x + y['elements'], response[element_type], [])] + ids_dict += all_elements_ids return ids_dict def crawl_concepts(self, concepts, data_set_name): @@ -511,18 +518,24 @@ def validate_indexed_concepts(self, elements, concepts): searched_element_ids = self._search_elements(curie, search_term) - present = bool(len([x for x in sample_elements[curie] if x in searched_element_ids])) - if not present: - log.error(f"Did not find expected variable {element.id} in search result.") + if curie not in sample_elements: + log.error(f"Did not find Curie id {curie} in Elements.") log.error(f"Concept id : {concept.id}, Search term: {search_term}") raise Exception(f"Validation error - Did not find {element.id} for" f" Concept id : {concept.id}, Search term: {search_term}") + else: + present = bool(len([x for x in sample_elements[curie] if x in searched_element_ids])) + if not present: + log.error(f"Did not find expected variable {element.id} in search result.") + log.error(f"Concept id : {concept.id}, Search term: {search_term}") + raise Exception(f"Validation error - Did not find {element.id} for" + f" Concept id : {concept.id}, Search term: {search_term}") def clear_index(self, index_id): - exists = self.search_obj.es.indices.exists(index_id) + exists = self.search_obj.es.indices.exists(index=index_id) if exists: log.info(f"Deleting index {index_id}") - response = self.event_loop.run_until_complete(self.search_obj.es.indices.delete(index_id)) + response = self.event_loop.run_until_complete(self.search_obj.es.indices.delete(index=index_id)) log.info(f"Cleared Elastic : {response}") log.info("Re-initializing the indicies") self.index_obj.init_indices() diff --git a/dags/roger/config/__init__.py b/dags/roger/config/__init__.py index 6a6d757b..403b25b1 100644 --- a/dags/roger/config/__init__.py +++ b/dags/roger/config/__init__.py @@ -144,6 +144,8 @@ class ElasticsearchConfig(DictLike): username: str = "elastic" password: str = "" nboost_host: str = "" + scheme: str = "http" + ca_path: str = "" @@ -174,6 +176,8 @@ def to_dug_conf(self) -> DugConfig: elastic_host=self.elasticsearch.host, elastic_password=self.elasticsearch.password, elastic_username=self.elasticsearch.username, + elastic_scheme=self.elasticsearch.scheme, + elastic_ca_path=self.elasticsearch.ca_path, redis_host=self.redisgraph.host, redis_password=self.redisgraph.password, redis_port=self.redisgraph.port, diff --git a/dags/roger/config/config.yaml b/dags/roger/config/config.yaml index 92503030..ae370235 100644 --- a/dags/roger/config/config.yaml +++ b/dags/roger/config/config.yaml @@ -92,6 +92,8 @@ elasticsearch: username: elastic password: "" nboost_host: "" + scheme: "http" + ca_path: "" validation: queries: @@ -154,4 +156,4 @@ lakefs_config: enabled: false access_key_id: "" secret_access_key: "" - host: "" \ No newline at end of file + host: "" diff --git a/dags/roger/dev-config.yaml b/dags/roger/config/dev-config.yaml similarity index 98% rename from dags/roger/dev-config.yaml rename to dags/roger/config/dev-config.yaml index 67fd6d5e..91fc0af3 100644 --- a/dags/roger/dev-config.yaml +++ b/dags/roger/config/dev-config.yaml @@ -13,7 +13,7 @@ data_root: "/Users/schreepc/Projects/helxplatform/roger/roger/test/data" dug_data_root: dug_helpers/dug_data/topmed_data base_data_uri: https://stars.renci.org/var/kgx_data/trapi-1.0/ kgx: - biolink_model_version: 1.5.0 + biolink_model_version: test #https://github.com/RedisGraph/redisgraph-bulk-loader/blob/master/redisgraph_bulk_loader/bulk_insert.py#L43 bulk_loader: @@ -115,4 +115,4 @@ validation: - var: "[N-]=[N+]=[N-]" - var: "[Ag+]" - var: "[Zn+2]" - - var: "[C-]#[O+]" \ No newline at end of file + - var: "[C-]#[O+]" diff --git a/dags/roger/core/bulkload.py b/dags/roger/core/bulkload.py index 772b7cf9..1eca92db 100644 --- a/dags/roger/core/bulkload.py +++ b/dags/roger/core/bulkload.py @@ -341,20 +341,26 @@ def insert (self): args = [] if len(nodes) > 0: bulk_path_root = storage.bulk_path('nodes') + os.path.sep - nodes_with_type = [ - f"{ x.replace(bulk_path_root, '').split('.')[0].replace('~', ':')} {x}" - for x in nodes] + nodes_with_type = [] + for x in nodes: + """ + These lines prep nodes bulk load by: + 1) appending to labels 'biolink.' + 2) combine labels to create a multilabel redis node i.e. "biolink.OrganismalEntity:biolink.SubjectOfInvestigation" + """ + file_name_type_part = x.replace(bulk_path_root, '').split('.')[0].split('~')[1] + all_labels = "biolink." + file_name_type_part + ":" + ":".join([f'biolink.{v.lstrip("biolink:")}' for v in self.biolink.toolkit.get_ancestors("biolink:" + file_name_type_part, reflexive=False, formatted=True )] ) + nodes_with_type.append(f"{all_labels} {x}") args.extend(("-N " + " -N ".join(nodes_with_type)).split()) if len(edges) > 0: bulk_path_root = storage.bulk_path('edges') + os.path.sep - edges_with_type = [ - f"{x.replace(bulk_path_root, '').strip(os.path.sep).split('.')[0].replace('~', ':')} {x}" - for x in edges] + edges_with_type = [f"biolink.{x.replace(bulk_path_root, '').strip(os.path.sep).split('.')[0].split('~')[1]} {x}" + for x in edges] + # Edge label now no longer has 'biolink:' args.extend(("-R " + " -R ".join(edges_with_type)).split()) args.extend([f"--separator={self.separator}"]) - args.extend([f"--host={redisgraph['host']}"]) - args.extend([f"--port={redisgraph['port']}"]) - args.extend([f"--password={redisgraph['password']}"]) + log.debug(f"--redis-url=redis://:{redisgraph['password']}@{redisgraph['host']}:{redisgraph['port']}") + args.extend([f"--redis-url=redis://:{redisgraph['password']}@{redisgraph['host']}:{redisgraph['port']}"]) args.extend(['--enforce-schema']) args.extend([f"{redisgraph['graph']}"]) """ standalone_mode=False tells click not to sys.exit() """ diff --git a/dags/roger/core/redis_graph.py b/dags/roger/core/redis_graph.py index 6710d3fc..ca65ddce 100644 --- a/dags/roger/core/redis_graph.py +++ b/dags/roger/core/redis_graph.py @@ -1,51 +1,44 @@ -"Graph abstraction layer over redisgraph python module" - import copy import redis -from redisgraph import Node, Edge, Graph +# from redisgraph import Node, Edge, Graph +# https://redis-py.readthedocs.io/en/v4.5.1/redismodules.html#redisgraph-commands +from redis.commands.graph.node import Node +from redis.commands.graph.edge import Edge + from roger.logger import get_logger logger = get_logger () - class RedisGraph: - """ Graph abstraction over RedisGraph - - A thin wrapper but provides us some options. - """ - - def __init__(self, host='localhost', port=6379, graph='default', - password=''): + """ Graph abstraction over RedisGraph. A thin wrapper but provides us some options. """ + + def __init__(self, host='localhost', port=6379, graph='default', password=''): """ Construct a connection to Redis Graph. """ self.r = redis.Redis(host=host, port=port, password=password) - self.redis_graph = Graph(graph, self.r) + self.redis_graph = self.r.graph(graph) def add_node (self, identifier=None, label=None, properties=None): """ Add a node with the given label and properties. """ - logger.debug ( - f"--adding node id:{identifier} label:{label} prop:{properties}") + logger.debug (f"--adding node id:{identifier} label:{label} prop:{properties}") if identifier and properties: properties['id'] = identifier - node = Node(node_id=identifier, alias=identifier, - label=label, properties=properties) + node = Node(node_id=identifier, alias=identifier, label=label, properties=properties) self.redis_graph.add_node(node) return node def get_edge (self, start, end, predicate=None): - "Get an edge from the graph with the specified start and end ids" + """ Get an edge from the graph with the specified start and end identifiers. """ result = None for edge in self.redis_graph.edges: if edge.src_node.id == start and edge.dest_node.id == end: result = edge break return result - + def add_edge (self, start, predicate, end, properties={}): - "Add edge with given predicate and properties btw start and end nodes" - logger.debug ( - f"--adding edge start:{start} pred:{predicate} " - f"end:{end} prop:{properties}") + """ Add an edge with the given predicate and properties between start and end nodes. """ + logger.debug (f"--adding edge start:{start} pred:{predicate} end:{end} prop:{properties}") if isinstance(start, str) and isinstance(end, str): start = Node(node_id = start, label='thing') end = Node(node_id = end, label='thing') @@ -56,13 +49,11 @@ def add_edge (self, start, predicate, end, properties={}): return edge def has_node (self, identifier): - "Does the graph have a node with this ID" return identifier in self.redis_graph.nodes def get_node (self, identifier, properties=None): - "Retrieve the node with the given id" return self.redis_graph.nodes[identifier] - + def commit (self): """ Commit modifications to the graph. """ self.redis_graph.commit() @@ -70,13 +61,13 @@ def commit (self): def query (self, query): """ Query and return result set. """ result = self.redis_graph.query(query) - result.pretty_print() + print(result) return result - + def delete (self): """ Delete the named graph. """ self.redis_graph.delete() - + def test (): rg = RedisGraph () p = { 'a' : 4, @@ -91,10 +82,10 @@ def test (): if last is not None: rg.add_edge (node, 'link', last) last = node - rg.commit () - rg.query ("MATCH (obj:yeah)-[:link]->(j:yeah) RETURN obj.a, obj.b, obj.x") - rg.query ("MATCH (a) RETURN a") + rg.commit () + rg.query ("""MATCH (obj:yeah)-[:link]->(j:yeah) RETURN obj.a, obj.b, obj.x""") + rg.query ("""MATCH (a) RETURN a""") rg.delete () # rg.query ("""MATCH (a { id : 'chemical_substance' }) RETURN a""") -#test () +#test () \ No newline at end of file diff --git a/dags/test_metadata.yaml b/dags/test_metadata.yaml new file mode 100644 index 00000000..54d508c4 --- /dev/null +++ b/dags/test_metadata.yaml @@ -0,0 +1,124 @@ +# This is a file that lists the data to be used for testing purposes +# It contains a reduced set of the metadata.yaml file +kgx: + versions: + - files: + - biolink-v1.0.json + - ctd-v1.0.json + - gtopdb-v1.0.json + - hetio-v1.0.json + - hgnc-v1.0.json + - hmdb-v1.0.json + - kegg-v1.0.json + - mychem-v1.0.json + - ontological-hierarchy-v1.0.json + - panther-v1.0.json + - foodb-v1.0.json + - pharos-v1.0.json + - intact-v1.0.json + - human-goa-v1.0.json + - uberongraph-v1.0.json + - viral-proteome-v1.0.json + version: v1.0 + name: baseline-graph + format: json + - files: + - biolink-v2.0.json + - ctd-v2.0.json + - gtopdb-v2.0.json + - hetio-v2.0.json + - hgnc-v2.0.json + - hmdb-v2.0.json + - kegg-v2.0.json + - mychem-v2.0.json + - ontological-hierarchy-v2.0.json + - panther-v2.0.json + - foodb-v2.0.json + - pharos-v2.0.json + - intact-v2.0.json + - human-goa-v2.0.json + - uberongraph-v2.0.json + - viral-proteome-v2.0.json + version: v2.0 + name: baseline-graph + format: json + - files: + - heal/sparc/curation-export-processed.json + version: v2.0 + name: sparc-kgx + format: json + - files: + - Biolink_edges_v3.0.jsonl + - Biolink_nodes_v3.0.jsonl + - CTD_edges_v3.0.jsonl + - CTD_nodes_v3.0.jsonl + - DrugCentral_edges_v3.0.jsonl + - DrugCentral_nodes_v3.0.jsonl + - GtoPdb_edges_v3.0.jsonl + - GtoPdb_nodes_v3.0.jsonl + - Hetio_edges_v3.0.jsonl + - Hetio_nodes_v3.0.jsonl + - HGNC_edges_v3.0.jsonl + - HGNC_nodes_v3.0.jsonl + - HMDB_edges_v3.0.jsonl + - HMDB_nodes_v3.0.jsonl + - HumanGOA_edges_v3.0.jsonl + - HumanGOA_nodes_v3.0.jsonl + - IntAct_edges_v3.0.jsonl + - IntAct_nodes_v3.0.jsonl + - OntologicalHierarchy_edges_v3.0.jsonl + - OntologicalHierarchy_nodes_v3.0.jsonl + - PANTHER_edges_v3.0.jsonl + - PANTHER_nodes_v3.0.jsonl + - PHAROS_edges_v3.0.jsonl + - PHAROS_nodes_v3.0.jsonl + - UberGraph_edges_v3.0.jsonl + - UberGraph_nodes_v3.0.jsonl + version: v3.0 + name: baseline-graph + format: jsonl + - version: test + files: + - hgnc_nodes.jsonl + - hgnc_edges.jsonl + name: test + - version: v3.0 + name: cde-graph + format: jsonl + files: + - cde/annotated_edges_v3.0.jsonl + - cde/annotated_nodes_v3.0.jsonl +dug_inputs: + versions: + - name: bdc + version: v1.0 + files: + s3: + - "bdc/v1.0/bdc_dbgap_data_dicts.tar.gz" + stars: + - "bdc_dbgap_data_dicts.tar.gz" + format: dbGaP + - name: nida + version: v1.0 + files: + s3: + - "nida/v1.0/nida-12studies.tar.gz" + stars: + - "nida-12studies.tar.gz" + format: nida + - name: sparc + version: v1.0 + files: + s3: + - "sparc/v1.0/sparc-dbgap-xml-formatted.tar.gz" + stars: + - "sparc-dbgap-xml-formatted.tar.gz" + format: sparc + - name: anvil + version: v1.0 + files: + s3: + - "bdc/v1.0/anvil_dbgap_data_dicts.tar.gz" + stars: + - "anvil_dbgap_data_dicts.tar.gz" + format: anvil \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 15e4b88f..7c698ed6 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -52,18 +52,20 @@ x-airflow-common: AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: 'true' AIRFLOW__CORE__LOAD_EXAMPLES: 'false' - ROGER_DUG__INPUTS_DATA__SETS: topmed,bdc-dbGaP + ROGER_DUG__INPUTS_DATA__SETS: "$ROGER_DUG__INPUTS_DATA__SETS" ROGER_ELASTICSEARCH_HOST: "$ELASTIC_API_HOST" ROGER_ELASTICSEARCH_PASSWORD: "$ELASTIC_PASSWORD" ROGER_ELASTICSEARCH_NBOOST__HOST: "$NBOOST_API_HOST" ROGER_REDISGRAPH_HOST: "$REDIS_HOST" ROGER_REDISGRAPH_PASSWORD: "$REDIS_PASSWORD" ROGER_KGX_DATASET__VERSION: "v3.0" + ROGER_DATA_DIR: "/opt/airflow/share/data" volumes: - ./dags:/opt/airflow/dags - ./logs:/opt/airflow/logs - ./plugins:/opt/airflow/plugins - user: "${AIRFLOW_UID:-50000}:${AIRFLOW_GID:-50000}" + - ./data:/opt/airflow/share/data + user: root depends_on: redis: condition: service_healthy @@ -79,24 +81,14 @@ services: POSTGRES_DB: airflow volumes: - postgres-db-volume:/var/lib/postgresql/data + - ${DATA_DIR}/elastic:/elastic + - ${DATA_DIR}/redis:/redis healthcheck: test: ["CMD", "pg_isready", "-U", "airflow"] interval: 5s retries: 5 restart: always - redis: - image: redislabs/redisgraph:2.4.1 - command: "redis-server --requirepass $REDIS_PASSWORD --loadmodule /usr/lib/redis/modules/redisgraph.so" - ports: - - $REDIS_PORT:$REDIS_PORT - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 5s - timeout: 30s - retries: 50 - restart: always - airflow-webserver: <<: *airflow-common command: webserver @@ -141,17 +133,33 @@ services: retries: 5 restart: always + redis: + # image: redislabs/redisgraph:2.10.9 #Alternative Image + user: root + image: 'redis/redis-stack:6.2.4-v2' + command: "redis-server --requirepass $REDIS_PASSWORD --loadmodule /opt/redis-stack/lib/redisgraph.so" + environment: + - REDIS_ARGS=--requirepass $REDIS_PASSWORD + volumes: + - $DATA_DIR/redis:/data # FIX RDB Error on local + ports: + - $REDIS_PORT:$REDIS_PORT + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 30s + retries: 50 + restart: always + dug: - image: cschreep/dug:develop + image: containers.renci.org/helxplatform/dug:latest depends_on: - elasticsearch - redis - - nboost restart: always environment: ELASTIC_API_HOST: "$ELASTIC_API_HOST" ELASTIC_PASSWORD: "$ELASTIC_PASSWORD" - NBOOST_API_HOST: "$NBOOST_API_HOST" REDIS_HOST: "$REDIS_HOST" REDIS_PASSWORD: "$REDIS_PASSWORD" FLASK_ENV: "development" @@ -159,35 +167,32 @@ services: entrypoint: [ "gunicorn", "--workers=$API_WORKERS", "--name=dug", "--bind=0.0.0.0:$API_PORT", "--timeout=$API_TIMEOUT", - "--log-level=DEBUG", "--enable-stdio-inheritance", "--reload", "dug.api:app" ] + "--log-level=DEBUG", "-k", "uvicorn.workers.UvicornWorker", "--reload", "dug.server:APP"] ports: - $API_PORT:$API_PORT elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:7.6.1 + user: root + image: docker.elastic.co/elasticsearch/elasticsearch:8.5.2 environment: - ELASTIC_PASSWORD=$ELASTIC_PASSWORD - discovery.type=single-node - xpack.security.enabled=true + - ingest.geoip.downloader.enabled=false volumes: - $DATA_DIR/elastic:/usr/share/elasticsearch/data ports: - '9200:9200' - '9300:9300' - nboost: - image: koursaros/nboost:0.3.9-pt - ports: - - '8000:8000' - tranql: - image: helxplatform/tranql-app:develop-0.0.56 + image: containers.renci.org/helxplatform/tranql:rti-merge ports: - - '8081:8081' + - '8001:8001' entrypoint: [ "gunicorn", "--workers=4", - "--bind=0.0.0.0:8081", + "--bind=0.0.0.0:8001", "--timeout=300", "--access-logfile=$TRANQL_ACCESS_LOG", "--error-logfile=$TRANQL_ERROR_LOG", @@ -198,6 +203,5 @@ services: - REDIS_PASSWORD=$REDIS_PASSWORD volumes: - ./tranql-schema.yaml:/tranql/tranql/conf/schema.yaml - volumes: - postgres-db-volume: + postgres-db-volume: \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c964fce7..54918df6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,12 @@ -apache-airflow==2.5.0 boto3==1.18.23 botocore==1.21.23 #black==21.10b0 +elasticsearch==8.5.2 flatten-dict -redisgraph==2.4.1 -redisgraph-bulk-loader==0.9.5 +redisgraph-bulk-loader==0.12.3 pytest PyYAML -git+https://github.com/helxplatform/dug@2.11.2 -elasticsearch==7.11.0 +git+https://github.com/helxplatform/dug@2.12.0 orjson kg-utils==0.0.6 bmt==1.1.0 diff --git a/roger-cli-steps.md b/roger-cli-steps.md new file mode 100644 index 00000000..8e132746 --- /dev/null +++ b/roger-cli-steps.md @@ -0,0 +1,27 @@ +# Deployment with Roger CLI + +## QUICK Local Set Up + +This is list steps to produce a local deployment of Roger. This set up does NOT use airflow and instead only uses the Roger CLI via **Makefile** commands. + +### Prerequsite Steps + +- Set up Roger dependencies by ensuring that the `.env` has all the correct information. +- Run the following docker compose commands + - `docker compose up tranql -d`: starts up tranql which is the API handlerfor redis graph in the `graph` stage + - `docker compose up redis -d`: starts up redis which will be used via redis graph for the `graph` stage + - `docker compose up dug -d`: starts up dug API to work as the API handler for elastic search in the `index` stage + - `docker compose up elasticsearch -d`: starts up elastic search for the `index` stage + +### Roger CLI Steps + +1) `python3 -m venv ~/.environments/roger` +2) `source ~/.environments/roger/bin/activate` +3) `pip install -r requirements.txt` +4) `export PYTHONPATH=$PWD/dags` +5) Change the elasticsearch and redisgraph `host` values to localhost in `dags/roger/config/config.yaml` +6) Get the S3 Bucket credentials (access_key, bucket, host, secret_key) and export them as environment variables with ROGER_S3_ in the front of the value like: `ROGER_S3_ACCESS__KEY=XXXXKEYXXXX` +7) `cd bin/` and here either run `make all` OR separate the commands into three steps: + 1) `make annotate`: executes the CLI related commands found in `bin/dug_annotate/Makefile` + 2) `make graph`: executes the CLI related commands found in `bin/roger_graph_build/Makefile` + 3) `make index`: executes the CLI related commands found in `bin/dug_index/Makefile` diff --git a/tests/test_redis_query.cypher b/tests/test_redis_query.cypher new file mode 100644 index 00000000..509df1fa --- /dev/null +++ b/tests/test_redis_query.cypher @@ -0,0 +1,5 @@ +MATCH (c{id:'HP:0032316'}) return c + +MATCH (disease:`Disease` {`id`: 'MONDO:0004979'}) WITH disease MATCH (disease)-[e1_disease_phenotypic_feature]-(phenotypic_feature:`PhenotypicFeature` {}) +WITH disease AS disease, phenotypic_feature AS phenotypic_feature, collect(e1_disease_phenotypic_feature) AS e1_disease_phenotypic_feature +RETURN disease,phenotypic_feature,e1_disease_phenotypic_feature,labels(disease) AS type__disease,labels(phenotypic_feature) AS type__phenotypic_feature,[edge in e1_disease_phenotypic_feature | type(edge)] AS type__e1_disease_phenotypic_feature,[edge in e1_disease_phenotypic_feature | [startNode(edge).id, endNode(edge).id]] AS id_pairs__e1_disease_phenotypic_feature \ No newline at end of file