This repository has been archived by the owner on Apr 19, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile
201 lines (148 loc) · 6.89 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
.PHONY: all
all: install build
clean:
rm env.lock
install: env.lock
env.lock:
pip install pipenv
pipenv install
cp /dev/null env.lock
# TODO: nmdc-02
schema_test_examples = nmdc_example_database study_test biosample_test gold_project_test emsl_project_test emsl_data_object_test mg_assembly_activities_test mg_assembly_data_objects_test readQC_activities_test readQC_data_objects_test functional-annotation img_mg_annotation_data_objects img_mg_annotation_objects MAGs_activity read_based_analysis_activity metagenome_annotation_activity Froze_Core_2015_S2_0_10_7_Metab gcms_metabolomics_data_products ftms_nom_data_products nom_analysis_activity
test_jsonschema: $(patsubst %, schema/test-%.valid, $(schema_test_examples))
test: test_jsonschema pytest
pytest: schema/nmdc.py
pipenv run python $<
build: python_dataclasses json_schema shex
# The mixs subschema is not hand-authored; it is compiled
# from a tsv saved from the mixs excel file
schema/mixs.yaml: mixs5/mixs_v5.txt mixs5/mixs_v5e.txt
scripts/mixs-to-blml.pl $^ > $@
schema/mixs_meta.schema.json: schema/mixs_meta.yaml
pipenv run gen-json-schema -t template $< > [email protected] && mv [email protected] $@
schema/mixs_meta.py: schema/mixs_meta.yaml
pipenv run gen-py-classes $< > [email protected] && mv [email protected] $@
# -- Generated Artefacts --
#
# the biolinkml framework provides the ability to compile
# the schema to: json-schema, python dataclasses, graphql, ...
all_schema_artefacts: python_dataclasses json_schema graphql schema_uml shex
python_dataclasses: schema/nmdc.py
json_schema: schema/nmdc.schema.json
graphql: schema/nmdc.graphql
owl: schema/nmdc.owl
shex: schema/nmdc.shex
schema_uml: schema/nmdc_schema_uml.png
# Python dataclasses
schema/%.py: schema/%.yaml env.lock
pipenv run gen-py-classes $< > [email protected] && pipenv run python [email protected] && mv [email protected] $@
#.PHONY: force_schema_build
#force_schema_build: schema/nmdc.yaml schema/prov.yaml schema/core.yaml schema/annotation.yaml
# JSON Schema
schema/nmdc.schema.json: schema/nmdc.yaml env.lock
pipenv run gen-json-schema -t database $< > [email protected] && jsonschema [email protected] && mv [email protected] $@
# This is temporary fix to apply additionalProperties: false gloabally
# see: https://github.com/biolink/biolinkml/issues/349
jq '. += {"additionalProperties": false}' $@ > [email protected] && mv [email protected] $@
schema/kbase.schema.json: schema/kbase.yaml env.lock
pipenv run gen-json-schema -t SESAR $< > [email protected] && jsonschema [email protected] && mv [email protected] $@
# OWL
schema/%.owl: schema/%.yaml env.lock
pipenv run gen-owl $< > [email protected] && mv [email protected] $@ && perl -pi -ne 's@prefix meta: <https://w3id.org/biolink/biolinkml/meta/>@prefix meta: <https://w3id.org/$*/>@' $@
# GraphQL
schema/%.graphql: schema/%.yaml env.lock
pipenv run gen-graphql $< > [email protected] && mv [email protected] $@
# ShEx
schema/%.shex: schema/%.yaml env.lock
pipenv run gen-shex $< > [email protected] && mv [email protected] $@
# JSONLD Context
schema/%.context.jsonld: schema/%.yaml env.lock
pipenv run gen-jsonld-context $< > [email protected] && mv [email protected] $@
schema/%.csv: schema/%.yaml env.lock
pipenv run gen-csv $< > [email protected] && mv [email protected] $@
# ProtoBuf
schema/%.proto: schema/%.yaml env.lock
pipenv run gen-proto $< > [email protected] && mv [email protected] $@
#schema/%.rdf: schema/%.yaml env.lock
# pipenv run gen-rdf $< > [email protected] && mv [email protected] $@
schema/test-%.valid: examples/%.json schema/nmdc.schema.json
jsonschema -i $^
docs: schema/nmdc.yaml env.lock
pipenv run gen-markdown --dir docs $<
jekyll-docs: schema/nmdc.yaml env.lock
pipenv run python scripts/jekyllmarkdowngen.py --yaml $< --dir docs
schema/nmdc_schema_uml.png: schema/nmdc.yaml
# pipenv run python schema/generate_uml.py $< $@
# temporary hack to address issue of generate_uml.py not finding the correct directory
cd schema/ && pipenv run python generate_uml.py $(notdir $<) $(notdir $@) && cd ..
# -- Mappings --
all_mappings: mappings/nmdc-to-kbase.tsv
mappings/nmdc-to-%.tsv: schema/%.ttl
rdfmatch -p nmdc -i mappings/prefixes.ttl -i schema/nmdc.ttl -i $< match > $@
# -- Slides --
docs/%-slides.pdf: docs/%-slides.md
pandoc $< -t beamer -o $@
docs/%-slides.pptx: docs/%-slides.md
pandoc $< -o $@
docs/%-slides.html: docs/%-slides.md
pandoc $< -s -t slidy -o $@
# -- requirments --
.PHONY: requirements-file
requirements-file:
# calls pipenv to generate the requirements.txt and requirements-dev.txt files
pipenv run pipenv_to_requirements
# -- ETL commands --
.PHONY: run-etl build-test-datasets build-example-db build-merged-db
# directories for output and data
etl_build_dir := metadata-translation/src/bin/output
etl_data_dir := metadata-translation/src/data
etl_example_dir := examples
# files produced by etl
etl_db := $(etl_build_dir)/nmdc_database.json
etl_db_zip := $(etl_build_dir)/nmdc_database.json.zip
etl_example_db := $(etl_build_dir)/nmdc_example_database.json
etl_test_sets := study_test.json gold_project_test.json biosample_test.json readQC_data_objects_test.json readQC_activities_test.json mg_assembly_data_objects_test.json mg_assembly_activities_test.json emsl_data_object_test.json emsl_project_test.json
# add directories to test set files
etl_test_set_files := $(foreach set, $(etl_test_sets), $(etl_build_dir)/$(set))
test-etl-vars:
@echo $(etl_db)
@echo $(etl_db_zip)
@echo $(etl_example_db)
@echo $(etl_test_sets)
@echo $(etl_test_set_files)
run-etl:
# runs the ETL script, creates the nmdc datbase and test/example files
# create needed dirs
mkdir -p metadata-translation/src/bin/output/nmdc_etl
# navigate to directory and execute pipeline script
cd metadata-translation/src/bin/ && python execute_etl_pipeline.py
# zip output and move to data directory
rm -f $(etl_db_zip) # remove old copy of zipped db
zip $(etl_db_zip) $(etl_db) # zip new copy
cp $(etl_db_zip) $(etl_data_dir) # cp new db to data directory
# copy example database to examples directory
cp $(etl_example_db) $(etl_example_dir)
# copy test datasets to examples
cp $(etl_test_set_files) $(etl_example_dir)
build-test-datasets:
# runs the ETL scipt, but ONLY creates the test dataset
# create needed dirs
mkdir -p metadata-translation/src/bin/output/nmdc_etl
# navigate to directory and execute pipeline script
cd metadata-translation/src/bin/ && python execute_etl_pipeline.py --testdata --no-etl --no-exdb --no-mergedb
# copy test datasets to examples
cp $(etl_test_set_files) $(etl_example_dir)
build-example-db:
# runs the ETL scipt, but ONLY creates the example database
# create needed dirs
mkdir -p metadata-translation/src/bin/output/nmdc_etl
# navigate to directory and execute pipeline script
cd metadata-translation/src/bin/ && python execute_etl_pipeline.py --exdb --no-testdata --no-etl --no-mergedb
# copy example database to examples directory
cp $(etl_example_db) $(etl_example_dir)
build-merged-db:
# runs the ETL scipt, but ONLY creates the merged data source used as input for the ETL pipeline
# create needed dirs
mkdir -p metadata-translation/src/bin/output/nmdc_etl
# navigate to directory and execute pipeline script
cd metadata-translation/src/bin/ && python execute_etl_pipeline.py --only-mergedb