Merge pull request #142 from helxplatform/release/2.1

Release 2.1.0
helxplatform · May 17, 2021 · b1b7204 · b1b7204
2 parents 929bff1 + 839dc3c
commit b1b7204
Showing 40 changed files with 1,379 additions and 1,121 deletions.
diff --git a/.env b/.env
@@ -1,12 +1,5 @@
 DATA_DIR=./local_storage
 
-NEO4J_PASSWORD=15707
-NEO4J_HOST=neo4j
-NEO4J_CPU_LIMIT=2
-NEO4J_CPU_RESERVATION=1
-NEO4J_MEM_LIMIT=3G
-NEO4J_MEM_RESERVATION=2G
-
 ELASTIC_PASSWORD=15707
 ELASTIC_API_HOST=elasticsearch
 ELASTIC_USERNAME=elastic

diff --git a/.env.template b/.env.template
@@ -1,12 +1,5 @@
 DATA_DIR=$DATA_DIR
 
-NEO4J_PASSWORD=$RANDOM
-NEO4J_HOST=neo4j
-NEO4J_CPU_LIMIT=2
-NEO4J_CPU_RESERVATION=1
-NEO4J_MEM_LIMIT=3G
-NEO4J_MEM_RESERVATION=2G
-
 ELASTIC_PASSWORD=$RANDOM
 ELASTIC_API_HOST=elasticsearch
 ELASTIC_USERNAME=elastic

diff --git a/.gitignore b/.gitignore
@@ -156,5 +156,4 @@ variable_file.json
 monarch_results.txt
 anno_fails.txt
 data/elastic/
-data/neo4j/
 crawl/
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -17,12 +17,12 @@ pipeline {
         }
         stage('Publish') {
             when {
-                branch 'develop'
+                tag "release-*"
             }
             steps {
                 sh '''
-                make build.image
-                make publish.image
+                make build
+                make publish
                 '''
             }
         }

diff --git a/README.md b/README.md
@@ -41,7 +41,6 @@ services from outside the container (but in a shell env), run:
 ```shell
 source .env
 export $(cut -d= -f1 .env)
-export NEO4J_HOST=localhost
 export ELASTIC_API_HOST=localhost
 export REDIS_HOST=localhost
 ```

diff --git a/data/heal_data_dicts.tar.gz b/data/heal_data_dicts.tar.gz
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -11,7 +11,7 @@ version: '3.0'
 ## NOTE: To connect to a dug service running in docker machine, from your local
 ##       development machine, you will need to follow the steps in the Quickstart
 ##       section of the README.md and set/export the env vars with special attention
-##       paid to the env vars: NEO4J_HOST, ELASTIC_API_HOST, and REDIS_HOST.
+##       paid to the env vars: ELASTIC_API_HOST, and REDIS_HOST.
 ##
 #################################################################################
 services:
@@ -28,7 +28,6 @@ services:
       context: .
     depends_on:
       - elasticsearch
-      - neo4j
       - redis
       - nboost
     restart: always
@@ -68,25 +67,6 @@ services:
       - '9200:9200'
       - '9300:9300'
 
-  #################################################################################
-  ##
-  ## A graph database provides query over linked data to drive indexing.
-  ##
-  #################################################################################
-  neo4j:
-    image: bitnami/neo4j:3.5.14
-    networks:
-      - dug-network
-    environment:
-      - NEO4J_PASSWORD=$NEO4J_PASSWORD
-      - NEO4J_HOST=$HOSTNAME
-    volumes:
-      - $DATA_DIR/neo4j:/bitnami
-    ports:
-      - '7474:7474'
-      - '7473:7473'
-      - '7687:7687'
-
   #################################################################################
   ##
   ## A memory cache for results of high volume service requests.

diff --git a/requirements.txt b/requirements.txt
@@ -8,7 +8,7 @@ elasticsearch==7.12.0
 flake8==3.9.0
 flasgger==0.9.4
 Flask==1.1.1
-Flask-Cors==3.0.8
+Flask-Cors==3.0.9
 Flask-RESTful==0.3.8
 gunicorn==20.0.4
 idna==2.8
@@ -17,8 +17,9 @@ Jinja2==2.11.3
 jsonschema==3.2.0
 MarkupSafe==1.1.1
 mistune==0.8.4
+pluggy==0.13.1
 pyrsistent==0.17.3
-pytest==5.4.0
+pytest==6.2.2
 pytz==2021.1
 PyYAML==5.4.1
 redis==3.4.1

diff --git a/setup.cfg b/setup.cfg
@@ -22,6 +22,7 @@ python_requires = >=3.7
 include_package_data = true
 install_requires =
     elasticsearch>=7.0.0,<8.0.0
+    pluggy
     requests
     requests_cache
     redis>=3.0.0

diff --git a/src/dug/__init__.py b/src/dug/__init__.py
@@ -1 +1 @@
-from ._version import __version__
+from ._version import __version__
diff --git a/src/dug/_version.py b/src/dug/_version.py
@@ -1 +1 @@
-__version__ = "2.0.1"
+__version__ = "2.1.0"
diff --git a/src/dug/api.py b/src/dug/api.py
@@ -1,16 +1,19 @@
 import argparse
 import json
-import jsonschema
 import logging
 import os
 import sys
 import traceback
+
+import jsonschema
 import yaml
 from flasgger import Swagger
 from flask import Flask, g, Response, request
-from flask_restful import Api, Resource
 from flask_cors import CORS
-from dug.core import Search
+from flask_restful import Api, Resource
+
+from dug.config import Config
+from dug.core.search import Search
 
 """
 Defines the semantic search API
@@ -44,14 +47,14 @@
 
 def dug ():
     if not hasattr(g, 'dug'):
-        g.search = Search ()
+        g.search = Search(Config.from_env())
     return g.search
-    
+
 class DugResource(Resource):
     """ Base class handler for Dug API requests. """
     def __init__(self):
         self.specs = {}
-        
+
     """ Functionality common to Dug services. """
     def validate (self, request, component):
         return
@@ -62,7 +65,7 @@ def validate (self, request, component):
         to_validate = self.specs["components"]["schemas"][component]
         try:
             app.logger.debug (f"--:Validating obj {json.dumps(request.json, indent=2)}")
-            app.logger.debug (f"  schema: {json.dumps(to_validate, indent=2)}")            
+            app.logger.debug (f"  schema: {json.dumps(to_validate, indent=2)}")
             jsonschema.validate(request.json, to_validate)
         except jsonschema.exceptions.ValidationError as error:
             app.logger.error (f"ERROR: {str(error)}")

diff --git a/src/dug/cli.py b/src/dug/cli.py
@@ -6,7 +6,8 @@
 import argparse
 import os
 
-from dug.core import Dug, logger
+from dug.config import Config
+from dug.core import Dug, logger, DugFactory
 
 
 class KwargParser(argparse.Action):
@@ -92,21 +93,30 @@ def get_argparser():
 
 
 def crawl(args):
-    dug = Dug()
+    config = Config.from_env()
+    factory = DugFactory(config)
+    dug = Dug(factory)
     dug.crawl(args.target, args.parser_type, args.element_type)
 
 
 def search(args):
-    dug = Dug()
+    config = Config.from_env()
+    factory = DugFactory(config)
+    dug = Dug(factory)
+    # dug = Dug()
     response = dug.search(args.target, args.query, **args.kwargs)
 
     print(response)
 
 
 def datatypes(args):
-    dug = Dug()
+    config = Config.from_env()
+    factory = DugFactory(config)
+    dug = Dug(factory)
+    # dug = Dug()
     response = dug.info(args.target, **args.kwargs)
 
+
 def status(args):
     print("Status check is not implemented yet!")
 

diff --git a/src/dug/config.py b/src/dug/config.py
@@ -1,62 +1,96 @@
 import os
-import dug.tranql as tql
-
-# Redis cache config
-redis_host = os.environ.get('REDIS_HOST', 'localhost')
-redis_port = os.environ.get('REDIS_PORT', 6379)
-redis_password = os.environ.get('REDIS_PASSWORD', '')
-
-# ElasticSearch config options
-elasticsearch_host = os.environ.get('ELASTIC_API_HOST', 'localhost')
-elasticsearch_port = os.environ.get('ELASTIC_API_PORT', 9200)
-
-# Preprocessor config that will be passed to annotate.Preprocessor constructor
-preprocessor = {
-    "debreviator": {
-        "BMI": "body mass index"
-    },
-    "stopwords": ["the"]
-}
-
-# Annotator config that will be passed to annotate.Annotator constructor
-annotator = {
-    'url': "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content="
-}
-
-# Normalizer config that will be passed to annotate.Normalizer constructor
-normalizer = {
-    'url': "https://nodenormalization-sri.renci.org/get_normalized_nodes?curie="
-}
-
-# Synonym service config that will be passed to annotate.SynonymHelper constructor
-synonym_service = {
-    'url': "https://onto.renci.org/synonyms/"
-}
-
-# Ontology metadata helper config that will be passed to annotate.OntologyHelper constructor
-ontology_helper = {
-    'url': "https://api.monarchinitiative.org/api/bioentity/"
-}
-
-# Redlist of identifiers not to expand via TranQL
-tranql_exclude_identifiers = ["CHEBI:17336"]
-
-# TranQL queries used to expand identifiers
-tranql_source = "/graph/gamma/quick"
-tranql_queries = {
-    "disease": tql.QueryFactory(["disease", "phenotypic_feature"], tranql_source),
-    "pheno": tql.QueryFactory(["phenotypic_feature", "disease"], tranql_source),
-    "anat": tql.QueryFactory(["disease", "anatomical_entity"], tranql_source),
-    "chem_to_disease": tql.QueryFactory(["chemical_substance", "disease"], tranql_source),
-    "phen_to_anat": tql.QueryFactory(["phenotypic_feature", "anatomical_entity"], tranql_source),
-    #"anat_to_disease": tql.QueryFactory(["anatomical_entity", "disease"], tranql_source),
-    #"anat_to_pheno": tql.QueryFactory(["anatomical_entity", "phenotypic_feature"], tranql_source)
-}
-
-concept_expander = {
-    'url': "https://tranql.renci.org/tranql/query?dynamic_id_resolution=true&asynchronous=false",
-    'min_tranql_score': 0.0
-}
-
-# List of ontology types that can be used even if they fail normalization
-ontology_greenlist = ["PATO", "CHEBI", "MONDO", "UBERON", "HP", "MESH", "UMLS"]
+
+from dataclasses import dataclass, field
+
+
+TRANQL_SOURCE: str = "/graph/gamma/quick"
+
+
+@dataclass
+class Config:
+    """
+        TODO: Populate description
+    """
+    elastic_password: str = "changeme"
+    redis_password: str = "changeme"
+
+    elastic_host: str = "elasticsearch"
+    elastic_port: int = 9200
+    elastic_username: str = "elastic"
+
+    redis_host: str = "redis"
+    redis_port: int = 6379
+
+    nboost_host: str = "nboost"
+    nboost_port: int = 8000
+
+    # Preprocessor config that will be passed to annotate.Preprocessor constructor
+    preprocessor: dict = field(default_factory=lambda: {
+        "debreviator": {
+            "BMI": "body mass index"
+        },
+        "stopwords": ["the"]
+    })
+
+    # Annotator config that will be passed to annotate.Annotator constructor
+    annotator: dict = field(default_factory=lambda: {
+        "url": "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content="
+    })
+
+    # Normalizer config that will be passed to annotate.Normalizer constructor
+    normalizer: dict = field(default_factory=lambda: {
+        "url": "https://nodenormalization-sri.renci.org/get_normalized_nodes?curie="
+    })
+
+    # Synonym service config that will be passed to annotate.SynonymHelper constructor
+    synonym_service: dict = field(default_factory=lambda: {
+        "url": "https://onto.renci.org/synonyms/"
+    })
+
+    # Ontology metadata helper config that will be passed to annotate.OntologyHelper constructor
+    ontology_helper: dict = field(default_factory=lambda: {
+        "url": "https://api.monarchinitiative.org/api/bioentity/"
+    })
+
+    # Redlist of identifiers not to expand via TranQL
+    tranql_exclude_identifiers: list = field(default_factory=lambda: ["CHEBI:17336"])
+
+    tranql_queries: dict = field(default_factory=lambda: {
+        "disease": ["disease", "phenotypic_feature"],
+        "pheno": ["phenotypic_feature", "disease"],
+        "anat": ["disease", "anatomical_entity"],
+        "chem_to_disease": ["chemical_substance", "disease"],
+        "phen_to_anat": ["phenotypic_feature", "anatomical_entity"],
+    })
+
+
+    concept_expander: dict = field(default_factory=lambda: {
+        "url": "https://tranql.renci.org/tranql/query?dynamic_id_resolution=true&asynchronous=false",
+        "min_tranql_score": 0.0
+    })
+
+    # List of ontology types that can be used even if they fail normalization
+    ontology_greenlist: list = field(default_factory=lambda: ["PATO", "CHEBI", "MONDO", "UBERON", "HP", "MESH", "UMLS"])
+
+    @classmethod
+    def from_env(cls):
+        env_vars = {
+            "elastic_host": "ELASTIC_API_HOST",
+            "elastic_port": "ELASTIC_API_PORT",
+            "elastic_username": "ELASTIC_USERNAME",
+            "elastic_password": "ELASTIC_PASSWORD",
+            "redis_host": "REDIS_HOST",
+            "redis_port": "REDIS_PORT",
+            "redis_password": "REDIS_PASSWORD",
+            "nboost_host": "NBOOST_API_HOST",
+            "nboost_port": "NBOOST_API_PORT"
+        }
+
+        kwargs = {}
+
+        for kwarg, env_var in env_vars.items():
+            env_value = os.environ.get(env_var)
+            if env_value:
+                kwargs[kwarg] = env_value
+
+        return cls(**kwargs)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from ._version import __version__
		from ._version import __version__