From 7f2e7724eccbbcd1afa3458a39bb6be25dbed725 Mon Sep 17 00:00:00 2001 From: "Mark A. Miller" Date: Tue, 8 Mar 2022 10:17:19 -0500 Subject: [PATCH] #58 --- .gitignore | 2 ++ .idea/misc.xml | 2 +- .idea/sample-annotator.iml | 4 +++- Makefile | 26 ++++++++++++++++++++++---- README.md | 19 ++++++++++++++++++- assets/bibo_DocumentStatus.tsv | 10 ---------- assets/bibo_DocumentStatus.tsv.bak | 10 ---------- 7 files changed, 46 insertions(+), 27 deletions(-) delete mode 100644 assets/bibo_DocumentStatus.tsv delete mode 100644 assets/bibo_DocumentStatus.tsv.bak diff --git a/.gitignore b/.gitignore index 4d4723f..c73ab23 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ examples/outputs temp/ output/ +__MACOSX/ + # Distribution / packaging .Python build/ diff --git a/.idea/misc.xml b/.idea/misc.xml index 04e2bb3..aa8767d 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,4 @@ - + \ No newline at end of file diff --git a/.idea/sample-annotator.iml b/.idea/sample-annotator.iml index cae78dc..88ecd1b 100644 --- a/.idea/sample-annotator.iml +++ b/.idea/sample-annotator.iml @@ -6,8 +6,10 @@ + + - + diff --git a/Makefile b/Makefile index f277f04..a0e4a76 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,10 @@ RUN = poetry run -biosample_sqlite_file = ~/biosample_basex_data_good_subset.db +#biosample_sqlite_file = ~/biosample_basex_data_good_subset.db +# curling from NERSC portal now +# but may want to change local destination... like a data directory? +# NOTE: this database file will be deleted by make clean. Don't do any manual modifications in there! +biosample_sqlite_file = biosample_basex_data_good_subset.db .PHONY: test clean all @@ -28,6 +32,7 @@ clean: rm -rf examples/outputs/*tsv rm -rf logs/*log rm -rf target/* + rm -rf $(biosample_sqlite_file) examples/outputs/report.tsv: examples/gold.json $(RUN) annotate-sample -R $@ $< @@ -35,13 +40,13 @@ examples/outputs/report.tsv: examples/gold.json downloads/mixs6_core.tsv: curl -L -s 'https://docs.google.com/spreadsheets/d/1QDeeUcDqXes69Y2RjU2aWgOpCVWo5OVsBX9MKmMqi_o/export?format=tsv&gid=178015749' > $@ -examples/outputs/non_attribute_metadata_sel_envs_partial.tsv: +examples/outputs/non_attribute_metadata_sel_envs_partial.tsv: $(biosample_sqlite_file) $(RUN) sqlite_client_cli \ --sqlite_path $(biosample_sqlite_file) \ --query "select * from non_attribute_metadata_sel_envs limit 9" \ --tsv_out $@ -rel_to_oxygen_example: downloads/mixs6_core.tsv +rel_to_oxygen_example: downloads/mixs6_core.tsv $(biosample_sqlite_file) $(RUN) rel_to_oxygen_example \ --sqlite_path $(biosample_sqlite_file) \ --mixs_core_path $< @@ -56,4 +61,17 @@ downloads/bibo.owl: assets/bibo_DocumentStatus.tsv: downloads/bibo.owl bin/robot.jar java -jar bin/robot.jar query --input $< --query sparql/bibo_DocumentStatus.sparql $@ - sed --in-place=.bak 's/^\?//' $@ \ No newline at end of file + sed --in-place=.bak 's/^\?//' $@ + +# full db at https://portal.nersc.gov/project/m3513/biosample/biosample_basex.db.gz +# subset has few tables +# fewer rows in XXX (corresponding to samples with reapired env package values of XXX...) +# see XXX +# and fewer columns in XXX, highlighting +downloads/biosample_basex_data_good_subset.db.zip: + # --location (-L) pursues redirects + curl --location https://portal.nersc.gov/project/m3513/biosample/biosample_basex_data_good_subset.db.zip -o $@ + +# unzipped file goes into the cwd by default, which would usually be the root of the project +biosample_basex_data_good_subset.db: downloads/biosample_basex_data_good_subset.db.zip + unzip $< diff --git a/README.md b/README.md index 825853b..09eb2b7 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,24 @@ # NMDC Sample Annotator API -## Installing +## Setup +1. requires python 3.9... should we try to make this compatible with something earlier? +2. install [poetry application](https://python-poetry.org/docs/#installation) +3. `git clone git@github.com:microbiomedata/sample-annotator.git` +4. `cd sample-annotator` +5. `poetry install` +6. `mkdir bin`... that directory _should_ already be in the repo with a placeholder.txt file, or it should be created by the Makefile + +test your setup with `make all` + +## Contributing? +_Please_ +1. create an issue describing the problem you plan to address or the contribution you intend to make +2. create a fork of the repo in your own GitHub account, or create a branch here in microbiomedata/sample-annotator. The name of the branch should be brief but include your issue number and a keyword or two from the issue's title +3. start adding code, probably in the `sample_annotator` subdirectory +4. save, `git add`, `git commit -m`, and `git push` +5. create a pull request +6. ask questions **any** time ## Command Line diff --git a/assets/bibo_DocumentStatus.tsv b/assets/bibo_DocumentStatus.tsv deleted file mode 100644 index 912eed8..0000000 --- a/assets/bibo_DocumentStatus.tsv +++ /dev/null @@ -1,10 +0,0 @@ -status_label -"accepted" -"draft" -"forthcoming" -"legal" -"non peer reviewed" -"peer reviewed" -"published" -"rejected" -"unpublished" diff --git a/assets/bibo_DocumentStatus.tsv.bak b/assets/bibo_DocumentStatus.tsv.bak deleted file mode 100644 index 25a7104..0000000 --- a/assets/bibo_DocumentStatus.tsv.bak +++ /dev/null @@ -1,10 +0,0 @@ -?status_label -"accepted" -"draft" -"forthcoming" -"legal" -"non peer reviewed" -"peer reviewed" -"published" -"rejected" -"unpublished"