Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Stephen Ramsey committed Nov 14, 2024
1 parent dbeeb16 commit 104763e
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 6 deletions.
12 changes: 12 additions & 0 deletions build/Snakefile-conversion
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,18 @@ rule ChEMBL_Conversion:
shell:
config['PYTHON_COMMAND'] + " {input.code} " + config['MYSQL_CONF'] + " " + config['CHEMBL_MYSQL_DBNAME'] + " {output.nodes} {output.edges} " + config['TEST_ARG'] + " > {log} 2>&1"

rule UNII_Conversion:
input:
code = config['UNII_CONVERSION_SCRIPT'],
real = config['UNII_TSV_FILE'],
validation = config['VALIDATION_PLACEHOLDER']
output:
nodes = config['UNII_OUTPUT_NODES_FILE']
log:
config['UNII_CONVERSION_LOG']
shell:
config['PYTHON_COMMAND'] + " {input.code} {input.real} {output.nodes} " + config['TEST_ARG'] + " > {log} 2>&1"

rule NCBIGene_Conversion:
input:
code = config['NCBIGENE_CONVERSION_SCRIPT'],
Expand Down
1 change: 1 addition & 0 deletions build/Snakefile-post-etl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ rule Merge:
disgenet_edges = config['DISGENET_OUTPUT_EDGES_FILE'],
kegg_nodes = config['KEGG_OUTPUT_NODES_FILE'],
kegg_edges = config['KEGG_OUTPUT_EDGES_FILE'],
unii_nodes = config['UNII_OUTPUT_NODES_FILE'],
clinicaltrialskg_nodes = config['CLINICALTRIALSKG_OUTPUT_NODES_FILE'],
clinicaltrialskg_edges = config['CLINICALTRIALSKG_OUTPUT_EDGES_FILE']
output:
Expand Down
10 changes: 10 additions & 0 deletions build/snakemake-config-var.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,16 @@ ncbigene_conversion_log: ${BUILD_DIR}/${ncbigene_conversion_base}${version_suffi
ncbigene_output_nodes_file: ${BUILD_DIR}/${ncbigene_output_base}${nodes_suffix}${test_suffix}.jsonl
ncbigene_output_edges_file: ${BUILD_DIR}/${ncbigene_output_base}${edges_suffix}${test_suffix}.jsonl

unii_extraction_base: extract-unii
unii_conversion_base: unii_tsv_to_kg_jsonl
unii_output_base: kg2-unii
unii_extraction_script: ${EXTRACT_CODE_DIR}/${unii_extraction_base}.sh
unii_extraction_log: ${BUILD_DIR}/${unii_extraction_base}${version_suffix}${test_suffix}.log
unii_tsv_file: ${BUILD_DIR}/unii/unii.tsv
unii_conversion_script: ${CONVERT_CODE_DIR}/${unii_conversion_base}.py
unii_conversion_log: ${BUILD_DIR}/${unii_conversion_base}${version_suffix}${test_suffix}.log
unii_output_nodes_file: ${BUILD_DIR}/${unii_output_base}${nodes_suffix}${test_suffix}.jsonl

dgidb_extraction_base: extract-dgidb
dgidb_conversion_base: dgidb_tsv_to_kg_jsonl
dgidb_output_base: kg2-dgidb
Expand Down
16 changes: 12 additions & 4 deletions kg2_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
CURIE_PREFIX_UMLS_STY = 'STY'
CURIE_PREFIX_UMLS_SOURCE = 'umls_source'
CURIE_PREFIX_UNICHEM_SOURCE = 'UNICHEM_source'
CURIE_PREFIX_UNII = 'UNII'
CURIE_PREFIX_UNIPROT = 'UniProtKB'
CURIE_PREFIX_VANDF = 'VANDF'

Expand Down Expand Up @@ -178,6 +179,7 @@
BASE_URL_UMLS = BASE_BASE_URL_IDENTIFIERS_ORG + 'umls:'
BASE_URL_UMLS_STY = 'http://purl.bioontology.org/ontology/STY/'
BASE_URL_UNICHEM = 'https://www.ebi.ac.uk/unichem/'
BASE_URL_UNII = 'https://precision.fda.gov/uniisearch/srs/unii/'
BASE_URL_UNIPROTKB = 'http://purl.uniprot.org/uniprot/'

BIOLINK_CATEGORY_ANATOMICAL_ENTITY = 'anatomical entity'
Expand Down Expand Up @@ -352,14 +354,20 @@ def close_single_jsonlines(info: tuple, output_file_name: str):
temp_output_file.close()


def create_kg2_jsonlines(test_mode: bool = False):
return create_single_jsonlines(test_mode), create_single_jsonlines(test_mode)
def create_kg2_jsonlines(test_mode: bool = False, include_edges = True):
jl_nodes = create_single_jsonlines(test_mode)
if include_edges:
jl_edges = create_single_jsonlines(test_mode)
else:
jl_edges = None
return jl_nodes, jl_edges


def close_kg2_jsonlines(nodes_info: tuple, edges_info: tuple,
output_nodes_file_name: str, output_edges_file_name: str):
close_single_jsonlines(nodes_info, output_nodes_file_name)
close_single_jsonlines(edges_info, output_edges_file_name)
if edges_info is not None:
close_single_jsonlines(edges_info, output_edges_file_name)


def start_read_jsonlines(file_name: str, type=dict):
Expand Down Expand Up @@ -732,4 +740,4 @@ def is_a_valid_http_url(id: str) -> bool:
valid = id.startswith('http://') or id.startswith('https://')
except validators.ValidationFailure:
valid = False
return valid
return valid
2 changes: 2 additions & 0 deletions maps/curies-to-urls-map.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,8 @@ use_for_bidirectional_mapping:
UMLSSC: http://purl.bioontology.org/ontology/STY/
-
UNICHEM_source: "https://www.ebi.ac.uk/unichem/"
-
UNII: "https://precision.fda.gov/uniisearch/srs/unii/"
-
UniProtKB: "http://purl.uniprot.org/uniprot/"
-
Expand Down
5 changes: 3 additions & 2 deletions setup/setup-kg2-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ sudo apt-get install -y \
automake \
git \
libssl-dev \
make
make \
unzip

sudo debconf-set-selections <<< "mysql-server mysql-server/root_password password ${mysql_password}"
sudo debconf-set-selections <<< "mysql-server mysql-server/root_password_again password ${mysql_password}"
Expand Down Expand Up @@ -157,4 +158,4 @@ setup_kg2_build_part2 >> ${setup_log_file} 2>&1
if [[ "${build_flag}" != "ci" ]]
then
${s3_cp_cmd} ${setup_log_file} s3://${s3_bucket_versioned}/
fi
fi

0 comments on commit 104763e

Please sign in to comment.