Skip to content

Commit

Permalink
Merge pull request #27 from microbiomedata/25-get-a-report-of-envo-te…
Browse files Browse the repository at this point in the history
…rms-with-ids-in-the-range-of-nmdc-or-nmdc-contributors

data-vs-ontology-reports
  • Loading branch information
turbomam authored Feb 15, 2024
2 parents 4e0cb82 + d4654a9 commit 342a54d
Show file tree
Hide file tree
Showing 13 changed files with 85 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
src/ontology/nmdco-redundant.ttl
local/
downloads/
**/__pycache__/

.DS_Store
Expand Down
45 changes: 45 additions & 0 deletions data-vs-ontology-reports.Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
RUN=poetry run

.PHONY: data-vs-ontology-all data-vs-ontology-clean


data-vs-ontology-all: data-vs-ontology-clean \
data-vs-ontology-reports/envo-id-ranges-report.tsv \
data-vs-ontology-reports/fma-usage-report.tsv

data-vs-ontology-clean:
rm -rf data-vs-ontology-reports/*
mkdir -p data-vs-ontology-reports
touch data-vs-ontology-reports/.gitkeep
rm -rf downloads/*owl*
mkdir -p downloads
touch downloads/.gitkeep

downloads/envo-idranges.owl.omn:
@echo "Downloading..."
ifeq ($(shell command -v wget 2> /dev/null),)
@echo "wget is not installed, trying with curl..."
@curl -o $@ https://raw.githubusercontent.com/EnvironmentOntology/envo/master/src/envo/envo-idranges.owl
else
@echo "Downloading with wget..."
@wget -O $@ https://raw.githubusercontent.com/EnvironmentOntology/envo/master/src/envo/envo-idranges.owl
endif

downloads/envo-idranges.owl.ttl: downloads/envo-idranges.owl.omn
@echo "Converting..."
@robot convert --input $< --output $@

data-vs-ontology-reports/envo-id-ranges-report.tsv: downloads/envo-idranges.owl.ttl
@echo "Generating report..."
$(RUN) report-id-ranges \
--id-ranges-ttl $< \
--output $@

data-vs-ontology-reports/biosample-triad-counts.tsv:
@echo "Generating report..."
$(RUN) report-instantiated-traids \
--output $@ \
--counts-output $(subst counts.tsv,report-counts.tsv,$@)

data-vs-ontology-reports/fma-usage-report.tsv: data-vs-ontology-reports/biosample-triad-counts.tsv
grep 'FMA:' $< > $@
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
33 changes: 33 additions & 0 deletions data-vs-ontology-reports/fma-usage-report.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
nmdc:bsm-11-x80t5771 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-1zsths21 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-pqgnra85 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-aa7wmf63 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-vxy31194 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-adv25093 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-xhpt1920 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-afygsz12 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-f0y72191 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-96cg5y52 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-4fnr9v33 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-y16b7q98 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-8vae2844 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-2xw6s444 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-y4310d70 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-e5n81610 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-dcw37a90 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-spjbg416 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-4ppwq475 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-zxjp0468 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-05fh0s26 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-jxa03j76 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-nvz3sq09 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-sbrsd510 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-5aysb633 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-yvnp5859 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-7yq6an11 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-qhewdm80 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-p9t9wv58 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-dzmzp451 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-datvdm83 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-vh4jjb52 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-0q1w4832 ENVO:01001002 FMA:14541 FMA:14541
Empty file added downloads/.gitkeep
Empty file.
21 changes: 0 additions & 21 deletions maintenance.Makefile

This file was deleted.

Binary file removed nmdc_ontology/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion nmdc_ontology/report_id_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


@click.command()
@click.option("--id-ranges-ttl", "-i", default="local/envo-idranges.owl.ttl", help="Input file path")
@click.option("--id-ranges-ttl", "-i", default="downloads/envo-idranges.owl.ttl", help="Input file path")
@click.option("--output", "-o", default="envo_id_ranges_report.tsv", help="Output file path")
def generate_id_ranges(id_ranges_ttl, output):
# Load the Turtle content into an RDF graph
Expand Down
10 changes: 5 additions & 5 deletions nmdc_ontology/report_instantiated_traids.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
@click.option('--api-url',
default="https://api.microbiomedata.org/nmdcschema/biosample_set?max_page_size=9999&projection=env_broad_scale%2Cenv_local_scale%2Cenv_medium",
help='URL of the API endpoint')
@click.option('--output-file', default="biosample_triad_report.tsv", help='Output file name')
@click.option('--counts-output-file', default="triad_term_counts.tsv", help='Output file name for value counts')
def main(api_url, output_file, counts_output_file):
@click.option('--output', default="biosample_triad_report.tsv", help='Output file name')
@click.option('--counts-output', default="triad_term_counts.tsv", help='Output file name for value counts')
def main(api_url, output, counts_output):
# Send the GET request
response = requests.get(api_url)

Expand Down Expand Up @@ -39,7 +39,7 @@ def main(api_url, output_file, counts_output_file):
df = pd.DataFrame(rows)

# Save the DataFrame to a TSV file
df.to_csv(output_file, sep="\t", index=False)
df.to_csv(output, sep="\t", index=False)

# Combine all values into a single list
combined_values = []
Expand All @@ -60,7 +60,7 @@ def main(api_url, output_file, counts_output_file):
counts_df[['ontology', 'local_id']] = counts_df['Value'].str.split(':', n=1, expand=True)

# Save counts DataFrame to a TSV file
counts_df.to_csv(counts_output_file, sep="\t", index=False)
counts_df.to_csv(counts_output, sep="\t", index=False)
else:
print("Failed to fetch data from the API:", response.status_code)

Expand Down

0 comments on commit 342a54d

Please sign in to comment.