Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

data-vs-ontology-reports #27

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
src/ontology/nmdco-redundant.ttl
local/
downloads/
**/__pycache__/

.DS_Store
Expand Down
45 changes: 45 additions & 0 deletions data-vs-ontology-reports.Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
RUN=poetry run

.PHONY: data-vs-ontology-all data-vs-ontology-clean


data-vs-ontology-all: data-vs-ontology-clean \
data-vs-ontology-reports/envo-id-ranges-report.tsv \
data-vs-ontology-reports/fma-usage-report.tsv

data-vs-ontology-clean:
rm -rf data-vs-ontology-reports/*
mkdir -p data-vs-ontology-reports
touch data-vs-ontology-reports/.gitkeep
rm -rf downloads/*owl*
mkdir -p downloads
touch downloads/.gitkeep

downloads/envo-idranges.owl.omn:
@echo "Downloading..."
ifeq ($(shell command -v wget 2> /dev/null),)
@echo "wget is not installed, trying with curl..."
@curl -o $@ https://raw.githubusercontent.com/EnvironmentOntology/envo/master/src/envo/envo-idranges.owl
else
@echo "Downloading with wget..."
@wget -O $@ https://raw.githubusercontent.com/EnvironmentOntology/envo/master/src/envo/envo-idranges.owl
endif

downloads/envo-idranges.owl.ttl: downloads/envo-idranges.owl.omn
@echo "Converting..."
@robot convert --input $< --output $@

data-vs-ontology-reports/envo-id-ranges-report.tsv: downloads/envo-idranges.owl.ttl
@echo "Generating report..."
$(RUN) report-id-ranges \
--id-ranges-ttl $< \
--output $@

data-vs-ontology-reports/biosample-triad-counts.tsv:
@echo "Generating report..."
$(RUN) report-instantiated-traids \
--output $@ \
--counts-output $(subst counts.tsv,report-counts.tsv,$@)

data-vs-ontology-reports/fma-usage-report.tsv: data-vs-ontology-reports/biosample-triad-counts.tsv
grep 'FMA:' $< > $@
File renamed without changes.
File renamed without changes.
File renamed without changes.
33 changes: 33 additions & 0 deletions data-vs-ontology-reports/fma-usage-report.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
nmdc:bsm-11-x80t5771 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-1zsths21 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-pqgnra85 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-aa7wmf63 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-vxy31194 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-adv25093 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-xhpt1920 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-afygsz12 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-f0y72191 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-96cg5y52 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-4fnr9v33 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-y16b7q98 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-8vae2844 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-2xw6s444 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-y4310d70 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-e5n81610 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-dcw37a90 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-spjbg416 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-4ppwq475 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-zxjp0468 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-05fh0s26 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-jxa03j76 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-nvz3sq09 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-sbrsd510 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-5aysb633 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-yvnp5859 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-7yq6an11 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-qhewdm80 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-p9t9wv58 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-dzmzp451 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-datvdm83 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-vh4jjb52 ENVO:01001002 FMA:14541 FMA:14541
nmdc:bsm-11-0q1w4832 ENVO:01001002 FMA:14541 FMA:14541
Empty file added downloads/.gitkeep
Empty file.
21 changes: 0 additions & 21 deletions maintenance.Makefile

This file was deleted.

Binary file removed nmdc_ontology/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion nmdc_ontology/report_id_ranges.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


@click.command()
@click.option("--id-ranges-ttl", "-i", default="local/envo-idranges.owl.ttl", help="Input file path")
@click.option("--id-ranges-ttl", "-i", default="downloads/envo-idranges.owl.ttl", help="Input file path")
@click.option("--output", "-o", default="envo_id_ranges_report.tsv", help="Output file path")
def generate_id_ranges(id_ranges_ttl, output):
# Load the Turtle content into an RDF graph
Expand Down
10 changes: 5 additions & 5 deletions nmdc_ontology/report_instantiated_traids.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
@click.option('--api-url',
default="https://api.microbiomedata.org/nmdcschema/biosample_set?max_page_size=9999&projection=env_broad_scale%2Cenv_local_scale%2Cenv_medium",
help='URL of the API endpoint')
@click.option('--output-file', default="biosample_triad_report.tsv", help='Output file name')
@click.option('--counts-output-file', default="triad_term_counts.tsv", help='Output file name for value counts')
def main(api_url, output_file, counts_output_file):
@click.option('--output', default="biosample_triad_report.tsv", help='Output file name')
@click.option('--counts-output', default="triad_term_counts.tsv", help='Output file name for value counts')
def main(api_url, output, counts_output):
# Send the GET request
response = requests.get(api_url)

Expand Down Expand Up @@ -39,7 +39,7 @@ def main(api_url, output_file, counts_output_file):
df = pd.DataFrame(rows)

# Save the DataFrame to a TSV file
df.to_csv(output_file, sep="\t", index=False)
df.to_csv(output, sep="\t", index=False)

# Combine all values into a single list
combined_values = []
Expand All @@ -60,7 +60,7 @@ def main(api_url, output_file, counts_output_file):
counts_df[['ontology', 'local_id']] = counts_df['Value'].str.split(':', n=1, expand=True)

# Save counts DataFrame to a TSV file
counts_df.to_csv(counts_output_file, sep="\t", index=False)
counts_df.to_csv(counts_output, sep="\t", index=False)
else:
print("Failed to fetch data from the API:", response.status_code)

Expand Down
Loading