Skip to content

Commit

Permalink
Merge pull request #156 from helxplatform/release/2.2.0
Browse files Browse the repository at this point in the history
Release 2.2.0
  • Loading branch information
cschreep authored Jun 10, 2021
2 parents b1b7204 + f2e7379 commit 77bdbca
Show file tree
Hide file tree
Showing 23 changed files with 17,187 additions and 94 deletions.
9 changes: 7 additions & 2 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
db
# Dug stuff
db/
crawl/
local_storage/
anno_fails.txt
dug.log

# Python stuff
**/__pycache__
Expand All @@ -19,4 +24,4 @@ tests/.pytest_cache
# Build artifacts
build/
dist/
**/*.egg-info
**/*.egg-info
15 changes: 12 additions & 3 deletions Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
pipeline {
agent any
agent {
kubernetes {
cloud 'kubernetes'
label 'agent-docker'
defaultContainer 'agent-docker'
}
}
stages {
stage('Install') {
steps {
Expand All @@ -17,11 +23,14 @@ pipeline {
}
stage('Publish') {
when {
tag "release-*"
buildingTag()
}
environment {
DOCKERHUB_CREDS = credentials('rencibuild_dockerhub_machine_user')
}
steps {
sh '''
make build
echo $DOCKERHUB_CREDS_PSW | docker login -u $DOCKERHUB_CREDS_USR --password-stdin
make publish
'''
}
Expand Down
49 changes: 8 additions & 41 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,52 +29,19 @@ install:
${PYTHON} -m pip install -r requirements.txt
${PYTHON} -m pip install .

#test.lint: Run flake8 on the source code
test.lint:
${PYTHON} -m flake8 src

#test.doc: Run doctests in the source code
test.doc:
${PYTHON} -m pytest --doctest-modules src

#test.unit: Run unit tests
test.unit:
${PYTHON} -m pytest tests/unit

#test: Run all tests
test: test.doc test.unit

#build: Build wheel and source distribution packages
build.python:
echo "Building distribution packages for version $(VERSION)"
${PYTHON} -m pip install --upgrade build
${PYTHON} -m build --sdist --wheel .
echo "Successfully built version $(VERSION)"
test:
# ${PYTHON} -m flake8 src
${PYTHON} -m pytest --doctest-modules src
${PYTHON} -m pytest tests

#build.image: Build the Docker image
build.image:
#build: Build Docker image
build:
echo "Building docker image: ${DOCKER_IMAGE}"
docker build -t ${DOCKER_IMAGE} -f Dockerfile .
echo "Successfully built: ${DOCKER_IMAGE}"

build.image.test:
echo "Testing dockerfile"

#build: Build Python artifacts and Docker image
build: build.python build.image build.image.test

#all: Alias to clean, install, test, build, and image
all: clean install test build

#publish.image: Push the Docker image
publish.image:
#publish: Build and push docker image
publish: build
docker tag ${DOCKER_IMAGE} ${DOCKER_REPO}/${DOCKER_IMAGE}
docker push ${DOCKER_REPO}/${DOCKER_IMAGE}

#publish.python: Push the build artifacts to PyPI
publish.python:
echo "publishing wheel..."
echo "publishing source..."

#publish: Push all build artifacts to appropriate repositories
publish: publish.python publish.image
847 changes: 847 additions & 0 deletions data/topmed_tags_v2.0.json

Large diffs are not rendered by default.

15,912 changes: 15,912 additions & 0 deletions data/topmed_variables_v2.0.csv

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ services:
"--workers=$API_WORKERS", "--name=dug",
"--bind=0.0.0.0:$API_PORT", "--timeout=$API_TIMEOUT",
"--log-level=DEBUG", "--enable-stdio-inheritance", "--reload", "dug.api:app" ]
volumes:
- ./src:/home/dug/dug/
ports:
- $API_PORT:$API_PORT

Expand Down
3 changes: 3 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@
[pytest]
addopts =
-p no:cacheprovider
markers =
api: mark a test as an api test
cli: mark a test as a cli test
testpaths =
tests
2 changes: 1 addition & 1 deletion src/dug/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.1.0"
__version__ = "2.2.0"
25 changes: 14 additions & 11 deletions src/dug/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,15 +272,18 @@ def post(self):
api.add_resource(DugSearchVarResource, '/search_var')
api.add_resource(DugAggDataType, '/agg_data_types')

def main(args=None):
parser = argparse.ArgumentParser(description='Dug Search API')
parser.add_argument('-p', '--port', type=int, help='Port to run service on.', default=5551)
parser.add_argument('-d', '--debug', help="Debug log level.", default=False, action='store_true')
args = parser.parse_args(args)

""" Configure """
if args.debug:
debug = True
logging.basicConfig(level=logging.DEBUG)
logger.info(f"starting dug on port={args.port} with debug={args.debug}")
app.run(host='0.0.0.0', port=args.port, debug=args.debug, threaded=True)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Dug Search API')
parser.add_argument('-p', '--port', type=int, help='Port to run service on.', default=5551)
parser.add_argument('-d', '--debug', help="Debug log level.", default=False, action='store_true')
args = parser.parse_args ()

""" Configure """
if args.debug:
debug = True
logging.basicConfig(level=logging.DEBUG)
logger.info (f"starting dug on port={args.port} with debug={args.debug}")
app.run(host='0.0.0.0', port=args.port, debug=args.debug, threaded=True)
main()
4 changes: 2 additions & 2 deletions src/dug/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,11 @@ def status(args):
print("Status check is not implemented yet!")


def main():
def main(args=None):

arg_parser = get_argparser()

args = arg_parser.parse_args()
args = arg_parser.parse_args(args)

try:
logger.setLevel(args.log_level)
Expand Down
2 changes: 1 addition & 1 deletion src/dug/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class Config:

# Normalizer config that will be passed to annotate.Normalizer constructor
normalizer: dict = field(default_factory=lambda: {
"url": "https://nodenormalization-sri.renci.org/get_normalized_nodes?curie="
"url": "https://nodenormalization-sri.renci.org/1.1/get_normalized_nodes?curie="
})

# Synonym service config that will be passed to annotate.SynonymHelper constructor
Expand Down
12 changes: 9 additions & 3 deletions src/dug/core/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,10 +352,16 @@ def make_request(self, curie: str, http_session: Session):
url = f"{self.url}{urllib.parse.quote(curie)}"

try:
response = http_session.get(url).json()
return response
response = http_session.get(url)
if response.status_code == 400:
logger.error(f"No synonyms returned for: `{curie}`. Validation error.")
return []
if response.status_code == 500:
logger.error(f"No synonyms returned for: `{curie}`. Internal server error from {self.url}.")
return []
return response.json()
except json.decoder.JSONDecodeError as e:
logger.error(f"No synonyms returned for: {curie}")
logger.error(f"Json parse error for response from `{url}`. Exception: {str(e)}")
return []

def handle_response(self, curie: str, raw_synonyms: List[dict]) -> List[str]:
Expand Down
2 changes: 1 addition & 1 deletion src/dug/core/parsers/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def set_search_terms(self):
# Traverse set of identifiers to determine set of search terms
search_terms = self.search_terms
for ident_id, ident in self.identifiers.items():
search_terms.extend([ident.label, ident.description] + ident.search_text + ident.synonyms)
search_terms.extend(ident.search_text + ident.synonyms)
self.search_terms = list(set(search_terms))

def set_optional_terms(self):
Expand Down
14 changes: 8 additions & 6 deletions src/dug/core/parsers/topmed_tag_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,14 @@ def __call__(self, input_file: InputFile) -> List[Indexable]:
reader = csv.DictReader(csvfile, delimiter='\t')
for row in reader:
row = {k.strip(): v for k, v in row.items()}
elem = DugElement(elem_id=row['variable_full_accession'],
name=row['variable_name'] if 'variable_name' in row else row['variable_full_accession'],
desc=row['variable_desc'] if 'variable_name' in row else row['variable_full_accession'],
elem_type="DbGaP",
collection_id=row['study_full_accession'],
collection_name=row['study_name'])
elem = DugElement(
elem_id=row['variable_full_accession'],
name=row['variable_name'] if 'variable_name' in row else row['variable_full_accession'],
desc=row['variable_description'] if 'variable_description' in row else row['variable_full_accession'],
elem_type="DbGaP",
collection_id=row['study_full_accession'],
collection_name=row['study_name']
)

# Create DBGaP links as study/variable actions
elem.collection_action = utils.get_dbgap_study_link(study_id=elem.collection_id)
Expand Down
99 changes: 91 additions & 8 deletions src/dug/core/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,16 +153,99 @@ def update_doc(self, index, doc, doc_id):

def search_concepts(self, index, query, offset=0, size=None, fuzziness=1, prefix_length=3):
"""
Changed to query_string for and/or and exact matches with quotations.
Changed to a long boolean match query to optimize search results
"""
query = {
'query_string': {
'query': query,
'fuzziness': fuzziness,
'fuzzy_prefix_length': prefix_length,
'fields': ["name", "description", "search_terms", "optional_terms"],
'quote_field_suffix': ".exact"
},
"bool": {
"should": [
{
"match_phrase": {
"name": {
"query": query,
"boost": 10
}
}
},
{
"match_phrase": {
"description": {
"query": query,
"boost": 6
}
}
},
{
"match_phrase": {
"search_terms": {
"query": query,
"boost": 8
}
}
},
{
"match": {
"name": {
"query": query,
"fuzziness": fuzziness,
"prefix_length": prefix_length,
"operator": "and",
"boost": 4
}
}
},
{
"match": {
"search_terms": {
"query": query,
"fuzziness": fuzziness,
"prefix_length": prefix_length,
"operator": "and",
"boost": 5
}
}
},
{
"match": {
"description": {
"query": query,
"fuzziness": fuzziness,
"prefix_length": prefix_length,
"operator": "and",
"boost": 3
}
}
},
{
"match": {
"description": {
"query": query,
"fuzziness": fuzziness,
"prefix_length": prefix_length,
"boost": 2
}
}
},
{
"match": {
"search_terms": {
"query": query,
"fuzziness": fuzziness,
"prefix_length": prefix_length,
"boost": 1
}
}
},
{
"match": {
"optional_terms": {
"query": query,
"fuzziness": fuzziness,
"prefix_length": prefix_length
}
}
}
]
}
}
body = json.dumps({'query': query})
total_items = self.es.count(body=body, index=index)
Expand Down
13 changes: 13 additions & 0 deletions tests/integration/data/test_tags_v2.0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[{
"model": "tags.tag",
"pk": 51,
"fields": {
"created": "2018-03-13t18:39:53z",
"modified": "2018-05-15t17:50:57z",
"title": "vte",
"lower_title": "vte",
"description": "qualitative indicator of venous thromboembolism (vte) status",
"instructions": "include variables that represent whether or when a subject has experienced vte. vte includes both deep venous thrombosis and pulmonary embolism. vte does not include arterial thrombosis or arterial embolism. include variables that represent prevalent and/or incident vte status. include variables that represent vte event data, including event status and time-to-event variables. include variables that indicate age at vte events, e.g. self-reported age at a vte event or followup time to a vte event. include variables indicating vte status based on any method of determination, including self report, adjudication, clinical verification, medical records, icd codes, or calculation/inference from related traits. include all instances of duplicated variables, e.g. vte variables included in multiple datasets. include variables indicating deep vein thrombosis (dvt) status, pulmonary embolism status, and vte status of unknown type. include variables that specify vte type. do not include variables that indicate possible vte treatments, e.g. surgical procedures. do not include variables that indicate medication use related to vte, e.g. anticoagulant or thrombolytic use.",
"creator": 7
}
}]
Loading

0 comments on commit 77bdbca

Please sign in to comment.