diff --git a/dags/roger/config/__init__.py b/dags/roger/config/__init__.py index ac9eb23..8cb1024 100644 --- a/dags/roger/config/__init__.py +++ b/dags/roger/config/__init__.py @@ -99,6 +99,21 @@ class AnnotationConfig(DictLike): "sapbert": { "classification_url": "https://med-nemo.apps.renci.org/annotate/", "annotator_url": "https://babel-sapbert.apps.renci.org/annotate/", + "score_threshold": 0.8, + "bagel": { + "enabled": False, + "url": "https://bagel.apps.renci.org/group_synonyms_openai", + "prompt": "bagel/ask_classes", + "llm_args": { + "llm_model_name": "gpt-4o-2024-05-13", + "organization": "", + "access_key": "", + "llm_model_args": { + "top_p": 0, + "temperature": 0.1 + } + } + } }, } ) diff --git a/dags/roger/config/config.yaml b/dags/roger/config/config.yaml index e9402ce..86c95ad 100644 --- a/dags/roger/config/config.yaml +++ b/dags/roger/config/config.yaml @@ -1,6 +1,6 @@ redisgraph: username: "" - password: "12345" + password: "weak" host: localhost graph: test port: 6379 @@ -42,13 +42,25 @@ bulk_loader: annotation: clear_http_cache: false - annotator_type: monarch + annotator_type: sapbert annotator_args: monarch: url: "https://api.monarchinitiative.org/api/nlp/annotate/entities?min_length=4&longest_only=false&include_abbreviation=false&include_acronym=false&include_numbers=false&content=" sapbert: classification_url: "https://med-nemo.apps.renci.org/annotate/" - annotator_url: "https://babel-sapbert.apps.renci.org/annotate/" + annotator_url: "https://sap-qdrant.apps.renci.org/annotate/" + score_threshold: 0.5 + bagel: + enabled: false + url: "http://localhost:9099/group_synonyms_openai" + prompt: "bagel/ask_classes" + llm_args: + llm_model_name: "gpt-4o-2024-05-13" + organization: + access_key: + llm_model_args: + top_p: 0 + temperature: 0.1 normalizer: "https://nodenormalization-dev.apps.renci.org/get_normalized_nodes?conflate=false&description=true&curie=" synonym_service: "https://name-resolution-sri.renci.org/reverse_lookup" ontology_metadata: "https://api.monarchinitiative.org/api/bioentity/" @@ -93,9 +105,9 @@ indexing: action: "files" elasticsearch: - host: elasticsearch + host: localhost username: elastic - password: "" + password: "12345" nboost_host: "" scheme: "http" ca_path: "" diff --git a/dags/roger/pipelines/bdc_pipelines.py b/dags/roger/pipelines/bdc_pipelines.py new file mode 100644 index 0000000..bf48392 --- /dev/null +++ b/dags/roger/pipelines/bdc_pipelines.py @@ -0,0 +1,50 @@ +"Dug pipeline for dbGaP data set" + +from roger.pipelines import DugPipeline + +class BIOLINCCdbGaPPipeline(DugPipeline): + "Pipeline for the dbGaP data set" + pipeline_name = 'biolincc' + parser_name = 'biolincc' + + +class covid19dbGaPPipeline(DugPipeline): + "Pipeline for the dbGaP data set" + pipeline_name = 'covid19-dbgap' + parser_name = 'covid19' + +class dirDbGaPPipeline(DugPipeline): + pipeline_name = "dir-dbgap" + parser_name = "dir" + +class LungMapDbGaPPipeline(DugPipeline): + pipeline_name = "lungmap-dbgap" + parser_name = "lungmap" + +class nsrrDbGaPPipeline(DugPipeline): + pipeline_name = "nsrr-dbgap" + parser_name = "nsrr" + +class ParentDbGaPPipeline(DugPipeline): + pipeline_name = "parent-dbgap" + parser_name = "parent" + +class PCGCDbGaPPipeline(DugPipeline): + pipeline_name = "pcgc-dbgap" + parser_name = "pcgc" + +class RecoverDbGaPPipeline(DugPipeline): + pipeline_name = "recover-dbgap" + parser_name = "recover" + +class TopmedDBGaPPipeline(DugPipeline): + pipeline_name = "topmed-gen3-dbgap" + parser_name = "topmeddbgap" + +class CureSCPipeline(DugPipeline): + pipeline_name = "curesc-dbgap" + parser_name = "curesc" + +class SmallDataDbGap(DugPipeline): + pipeline_name = "small-data-dbgap" + parser_name = "topmeddbgap" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d1b1f68..44c9a39 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ jsonpickle redisgraph-bulk-loader==0.12.3 pytest PyYAML -git+https://github.com/helxplatform/dug@2.13.1 +git+https://github.com/helxplatform/dug@develop orjson kg-utils==0.0.6 bmt==1.1.0