Skip to content

Commit

Permalink
Showing 40 changed files with 1,379 additions and 1,121 deletions.
7 changes: 0 additions & 7 deletions .env
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
DATA_DIR=./local_storage

NEO4J_PASSWORD=15707
NEO4J_HOST=neo4j
NEO4J_CPU_LIMIT=2
NEO4J_CPU_RESERVATION=1
NEO4J_MEM_LIMIT=3G
NEO4J_MEM_RESERVATION=2G

ELASTIC_PASSWORD=15707
ELASTIC_API_HOST=elasticsearch
ELASTIC_USERNAME=elastic
7 changes: 0 additions & 7 deletions .env.template
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
DATA_DIR=$DATA_DIR

NEO4J_PASSWORD=$RANDOM
NEO4J_HOST=neo4j
NEO4J_CPU_LIMIT=2
NEO4J_CPU_RESERVATION=1
NEO4J_MEM_LIMIT=3G
NEO4J_MEM_RESERVATION=2G

ELASTIC_PASSWORD=$RANDOM
ELASTIC_API_HOST=elasticsearch
ELASTIC_USERNAME=elastic
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -156,5 +156,4 @@ variable_file.json
monarch_results.txt
anno_fails.txt
data/elastic/
data/neo4j/
crawl/
6 changes: 3 additions & 3 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -17,12 +17,12 @@ pipeline {
}
stage('Publish') {
when {
branch 'develop'
tag "release-*"
}
steps {
sh '''
make build.image
make publish.image
make build
make publish
'''
}
}
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -41,7 +41,6 @@ services from outside the container (but in a shell env), run:
```shell
source .env
export $(cut -d= -f1 .env)
export NEO4J_HOST=localhost
export ELASTIC_API_HOST=localhost
export REDIS_HOST=localhost
```
Binary file added data/heal_data_dicts.tar.gz
Binary file not shown.
22 changes: 1 addition & 21 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -11,7 +11,7 @@ version: '3.0'
## NOTE: To connect to a dug service running in docker machine, from your local
## development machine, you will need to follow the steps in the Quickstart
## section of the README.md and set/export the env vars with special attention
## paid to the env vars: NEO4J_HOST, ELASTIC_API_HOST, and REDIS_HOST.
## paid to the env vars: ELASTIC_API_HOST, and REDIS_HOST.
##
#################################################################################
services:
@@ -28,7 +28,6 @@ services:
context: .
depends_on:
- elasticsearch
- neo4j
- redis
- nboost
restart: always
@@ -68,25 +67,6 @@ services:
- '9200:9200'
- '9300:9300'

#################################################################################
##
## A graph database provides query over linked data to drive indexing.
##
#################################################################################
neo4j:
image: bitnami/neo4j:3.5.14
networks:
- dug-network
environment:
- NEO4J_PASSWORD=$NEO4J_PASSWORD
- NEO4J_HOST=$HOSTNAME
volumes:
- $DATA_DIR/neo4j:/bitnami
ports:
- '7474:7474'
- '7473:7473'
- '7687:7687'

#################################################################################
##
## A memory cache for results of high volume service requests.
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@ elasticsearch==7.12.0
flake8==3.9.0
flasgger==0.9.4
Flask==1.1.1
Flask-Cors==3.0.8
Flask-Cors==3.0.9
Flask-RESTful==0.3.8
gunicorn==20.0.4
idna==2.8
@@ -17,8 +17,9 @@ Jinja2==2.11.3
jsonschema==3.2.0
MarkupSafe==1.1.1
mistune==0.8.4
pluggy==0.13.1
pyrsistent==0.17.3
pytest==5.4.0
pytest==6.2.2
pytz==2021.1
PyYAML==5.4.1
redis==3.4.1
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@ python_requires = >=3.7
include_package_data = true
install_requires =
elasticsearch>=7.0.0,<8.0.0
pluggy
requests
requests_cache
redis>=3.0.0
2 changes: 1 addition & 1 deletion src/dug/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from ._version import __version__
from ._version import __version__
2 changes: 1 addition & 1 deletion src/dug/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.0.1"
__version__ = "2.1.0"
17 changes: 10 additions & 7 deletions src/dug/api.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
import argparse
import json
import jsonschema
import logging
import os
import sys
import traceback

import jsonschema
import yaml
from flasgger import Swagger
from flask import Flask, g, Response, request
from flask_restful import Api, Resource
from flask_cors import CORS
from dug.core import Search
from flask_restful import Api, Resource

from dug.config import Config
from dug.core.search import Search

"""
Defines the semantic search API
@@ -44,14 +47,14 @@

def dug ():
if not hasattr(g, 'dug'):
g.search = Search ()
g.search = Search(Config.from_env())
return g.search

class DugResource(Resource):
""" Base class handler for Dug API requests. """
def __init__(self):
self.specs = {}

""" Functionality common to Dug services. """
def validate (self, request, component):
return
@@ -62,7 +65,7 @@ def validate (self, request, component):
to_validate = self.specs["components"]["schemas"][component]
try:
app.logger.debug (f"--:Validating obj {json.dumps(request.json, indent=2)}")
app.logger.debug (f" schema: {json.dumps(to_validate, indent=2)}")
app.logger.debug (f" schema: {json.dumps(to_validate, indent=2)}")
jsonschema.validate(request.json, to_validate)
except jsonschema.exceptions.ValidationError as error:
app.logger.error (f"ERROR: {str(error)}")
18 changes: 14 additions & 4 deletions src/dug/cli.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,8 @@
import argparse
import os

from dug.core import Dug, logger
from dug.config import Config
from dug.core import Dug, logger, DugFactory


class KwargParser(argparse.Action):
@@ -92,21 +93,30 @@ def get_argparser():


def crawl(args):
dug = Dug()
config = Config.from_env()
factory = DugFactory(config)
dug = Dug(factory)
dug.crawl(args.target, args.parser_type, args.element_type)


def search(args):
dug = Dug()
config = Config.from_env()
factory = DugFactory(config)
dug = Dug(factory)
# dug = Dug()
response = dug.search(args.target, args.query, **args.kwargs)

print(response)


def datatypes(args):
dug = Dug()
config = Config.from_env()
factory = DugFactory(config)
dug = Dug(factory)
# dug = Dug()
response = dug.info(args.target, **args.kwargs)


def status(args):
print("Status check is not implemented yet!")

156 changes: 95 additions & 61 deletions src/dug/config.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,96 @@
import os
import dug.tranql as tql

# Redis cache config
redis_host = os.environ.get('REDIS_HOST', 'localhost')
redis_port = os.environ.get('REDIS_PORT', 6379)
redis_password = os.environ.get('REDIS_PASSWORD', '')

# ElasticSearch config options
elasticsearch_host = os.environ.get('ELASTIC_API_HOST', 'localhost')
elasticsearch_port = os.environ.get('ELASTIC_API_PORT', 9200)

# Preprocessor config that will be passed to annotate.Preprocessor constructor
preprocessor = {
"debreviator": {
"BMI": "body mass index"
},
"stopwords": ["the"]
}

# Annotator config that will be passed to annotate.Annotator constructor
annotator = {
'url': "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content="
}

# Normalizer config that will be passed to annotate.Normalizer constructor
normalizer = {
'url': "https://nodenormalization-sri.renci.org/get_normalized_nodes?curie="
}

# Synonym service config that will be passed to annotate.SynonymHelper constructor
synonym_service = {
'url': "https://onto.renci.org/synonyms/"
}

# Ontology metadata helper config that will be passed to annotate.OntologyHelper constructor
ontology_helper = {
'url': "https://api.monarchinitiative.org/api/bioentity/"
}

# Redlist of identifiers not to expand via TranQL
tranql_exclude_identifiers = ["CHEBI:17336"]

# TranQL queries used to expand identifiers
tranql_source = "/graph/gamma/quick"
tranql_queries = {
"disease": tql.QueryFactory(["disease", "phenotypic_feature"], tranql_source),
"pheno": tql.QueryFactory(["phenotypic_feature", "disease"], tranql_source),
"anat": tql.QueryFactory(["disease", "anatomical_entity"], tranql_source),
"chem_to_disease": tql.QueryFactory(["chemical_substance", "disease"], tranql_source),
"phen_to_anat": tql.QueryFactory(["phenotypic_feature", "anatomical_entity"], tranql_source),
#"anat_to_disease": tql.QueryFactory(["anatomical_entity", "disease"], tranql_source),
#"anat_to_pheno": tql.QueryFactory(["anatomical_entity", "phenotypic_feature"], tranql_source)
}

concept_expander = {
'url': "https://tranql.renci.org/tranql/query?dynamic_id_resolution=true&asynchronous=false",
'min_tranql_score': 0.0
}

# List of ontology types that can be used even if they fail normalization
ontology_greenlist = ["PATO", "CHEBI", "MONDO", "UBERON", "HP", "MESH", "UMLS"]

from dataclasses import dataclass, field


TRANQL_SOURCE: str = "/graph/gamma/quick"


@dataclass
class Config:
"""
TODO: Populate description
"""
elastic_password: str = "changeme"
redis_password: str = "changeme"

elastic_host: str = "elasticsearch"
elastic_port: int = 9200
elastic_username: str = "elastic"

redis_host: str = "redis"
redis_port: int = 6379

nboost_host: str = "nboost"
nboost_port: int = 8000

# Preprocessor config that will be passed to annotate.Preprocessor constructor
preprocessor: dict = field(default_factory=lambda: {
"debreviator": {
"BMI": "body mass index"
},
"stopwords": ["the"]
})

# Annotator config that will be passed to annotate.Annotator constructor
annotator: dict = field(default_factory=lambda: {
"url": "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content="
})

# Normalizer config that will be passed to annotate.Normalizer constructor
normalizer: dict = field(default_factory=lambda: {
"url": "https://nodenormalization-sri.renci.org/get_normalized_nodes?curie="
})

# Synonym service config that will be passed to annotate.SynonymHelper constructor
synonym_service: dict = field(default_factory=lambda: {
"url": "https://onto.renci.org/synonyms/"
})

# Ontology metadata helper config that will be passed to annotate.OntologyHelper constructor
ontology_helper: dict = field(default_factory=lambda: {
"url": "https://api.monarchinitiative.org/api/bioentity/"
})

# Redlist of identifiers not to expand via TranQL
tranql_exclude_identifiers: list = field(default_factory=lambda: ["CHEBI:17336"])

tranql_queries: dict = field(default_factory=lambda: {
"disease": ["disease", "phenotypic_feature"],
"pheno": ["phenotypic_feature", "disease"],
"anat": ["disease", "anatomical_entity"],
"chem_to_disease": ["chemical_substance", "disease"],
"phen_to_anat": ["phenotypic_feature", "anatomical_entity"],
})


concept_expander: dict = field(default_factory=lambda: {
"url": "https://tranql.renci.org/tranql/query?dynamic_id_resolution=true&asynchronous=false",
"min_tranql_score": 0.0
})

# List of ontology types that can be used even if they fail normalization
ontology_greenlist: list = field(default_factory=lambda: ["PATO", "CHEBI", "MONDO", "UBERON", "HP", "MESH", "UMLS"])

@classmethod
def from_env(cls):
env_vars = {
"elastic_host": "ELASTIC_API_HOST",
"elastic_port": "ELASTIC_API_PORT",
"elastic_username": "ELASTIC_USERNAME",
"elastic_password": "ELASTIC_PASSWORD",
"redis_host": "REDIS_HOST",
"redis_port": "REDIS_PORT",
"redis_password": "REDIS_PASSWORD",
"nboost_host": "NBOOST_API_HOST",
"nboost_port": "NBOOST_API_PORT"
}

kwargs = {}

for kwarg, env_var in env_vars.items():
env_value = os.environ.get(env_var)
if env_value:
kwargs[kwarg] = env_value

return cls(**kwargs)
Loading

0 comments on commit b1b7204

Please sign in to comment.