diff --git a/.gitignore b/.gitignore
index 4d4723f..c73ab23 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@ examples/outputs
temp/
output/
+__MACOSX/
+
# Distribution / packaging
.Python
build/
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 04e2bb3..aa8767d 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,4 +1,4 @@
-
+
\ No newline at end of file
diff --git a/.idea/sample-annotator.iml b/.idea/sample-annotator.iml
index cae78dc..88ecd1b 100644
--- a/.idea/sample-annotator.iml
+++ b/.idea/sample-annotator.iml
@@ -6,8 +6,10 @@
+
+
-
+
diff --git a/Makefile b/Makefile
index f277f04..a0e4a76 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,10 @@
RUN = poetry run
-biosample_sqlite_file = ~/biosample_basex_data_good_subset.db
+#biosample_sqlite_file = ~/biosample_basex_data_good_subset.db
+# curling from NERSC portal now
+# but may want to change local destination... like a data directory?
+# NOTE: this database file will be deleted by make clean. Don't do any manual modifications in there!
+biosample_sqlite_file = biosample_basex_data_good_subset.db
.PHONY: test clean all
@@ -28,6 +32,7 @@ clean:
rm -rf examples/outputs/*tsv
rm -rf logs/*log
rm -rf target/*
+ rm -rf $(biosample_sqlite_file)
examples/outputs/report.tsv: examples/gold.json
$(RUN) annotate-sample -R $@ $<
@@ -35,13 +40,13 @@ examples/outputs/report.tsv: examples/gold.json
downloads/mixs6_core.tsv:
curl -L -s 'https://docs.google.com/spreadsheets/d/1QDeeUcDqXes69Y2RjU2aWgOpCVWo5OVsBX9MKmMqi_o/export?format=tsv&gid=178015749' > $@
-examples/outputs/non_attribute_metadata_sel_envs_partial.tsv:
+examples/outputs/non_attribute_metadata_sel_envs_partial.tsv: $(biosample_sqlite_file)
$(RUN) sqlite_client_cli \
--sqlite_path $(biosample_sqlite_file) \
--query "select * from non_attribute_metadata_sel_envs limit 9" \
--tsv_out $@
-rel_to_oxygen_example: downloads/mixs6_core.tsv
+rel_to_oxygen_example: downloads/mixs6_core.tsv $(biosample_sqlite_file)
$(RUN) rel_to_oxygen_example \
--sqlite_path $(biosample_sqlite_file) \
--mixs_core_path $<
@@ -56,4 +61,17 @@ downloads/bibo.owl:
assets/bibo_DocumentStatus.tsv: downloads/bibo.owl bin/robot.jar
java -jar bin/robot.jar query --input $< --query sparql/bibo_DocumentStatus.sparql $@
- sed --in-place=.bak 's/^\?//' $@
\ No newline at end of file
+ sed --in-place=.bak 's/^\?//' $@
+
+# full db at https://portal.nersc.gov/project/m3513/biosample/biosample_basex.db.gz
+# subset has few tables
+# fewer rows in XXX (corresponding to samples with reapired env package values of XXX...)
+# see XXX
+# and fewer columns in XXX, highlighting
+downloads/biosample_basex_data_good_subset.db.zip:
+ # --location (-L) pursues redirects
+ curl --location https://portal.nersc.gov/project/m3513/biosample/biosample_basex_data_good_subset.db.zip -o $@
+
+# unzipped file goes into the cwd by default, which would usually be the root of the project
+biosample_basex_data_good_subset.db: downloads/biosample_basex_data_good_subset.db.zip
+ unzip $<
diff --git a/README.md b/README.md
index 825853b..09eb2b7 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,24 @@
# NMDC Sample Annotator API
-## Installing
+## Setup
+1. requires python 3.9... should we try to make this compatible with something earlier?
+2. install [poetry application](https://python-poetry.org/docs/#installation)
+3. `git clone git@github.com:microbiomedata/sample-annotator.git`
+4. `cd sample-annotator`
+5. `poetry install`
+6. `mkdir bin`... that directory _should_ already be in the repo with a placeholder.txt file, or it should be created by the Makefile
+
+test your setup with `make all`
+
+## Contributing?
+_Please_
+1. create an issue describing the problem you plan to address or the contribution you intend to make
+2. create a fork of the repo in your own GitHub account, or create a branch here in microbiomedata/sample-annotator. The name of the branch should be brief but include your issue number and a keyword or two from the issue's title
+3. start adding code, probably in the `sample_annotator` subdirectory
+4. save, `git add`, `git commit -m`, and `git push`
+5. create a pull request
+6. ask questions **any** time
## Command Line
diff --git a/assets/bibo_DocumentStatus.tsv b/assets/bibo_DocumentStatus.tsv
deleted file mode 100644
index 912eed8..0000000
--- a/assets/bibo_DocumentStatus.tsv
+++ /dev/null
@@ -1,10 +0,0 @@
-status_label
-"accepted"
-"draft"
-"forthcoming"
-"legal"
-"non peer reviewed"
-"peer reviewed"
-"published"
-"rejected"
-"unpublished"
diff --git a/assets/bibo_DocumentStatus.tsv.bak b/assets/bibo_DocumentStatus.tsv.bak
deleted file mode 100644
index 25a7104..0000000
--- a/assets/bibo_DocumentStatus.tsv.bak
+++ /dev/null
@@ -1,10 +0,0 @@
-?status_label
-"accepted"
-"draft"
-"forthcoming"
-"legal"
-"non peer reviewed"
-"peer reviewed"
-"published"
-"rejected"
-"unpublished"