diff --git a/docs/assets/git_commit_usecase.svg b/docs/assets/git_commit_usecase.svg
new file mode 100644
index 0000000..0d8a3d9
--- /dev/null
+++ b/docs/assets/git_commit_usecase.svg
@@ -0,0 +1,160 @@
+
diff --git a/docs/assets/git_push_usecase.svg b/docs/assets/git_push_usecase.svg
new file mode 100644
index 0000000..b8ed8b1
--- /dev/null
+++ b/docs/assets/git_push_usecase.svg
@@ -0,0 +1,197 @@
+
diff --git a/docs/assets/menu-filtered.png b/docs/assets/menu-filtered.png
deleted file mode 100644
index 465472b..0000000
Binary files a/docs/assets/menu-filtered.png and /dev/null differ
diff --git a/docs/assets/menu-full.png b/docs/assets/menu-full.png
deleted file mode 100644
index 560adf6..0000000
Binary files a/docs/assets/menu-full.png and /dev/null differ
diff --git a/docs/assets/newyaml_usecase.svg b/docs/assets/newyaml_usecase.svg
new file mode 100644
index 0000000..55c705c
--- /dev/null
+++ b/docs/assets/newyaml_usecase.svg
@@ -0,0 +1,215 @@
+
diff --git a/docs/assets/noexecute_usecase.svg b/docs/assets/noexecute_usecase.svg
new file mode 100644
index 0000000..4209224
--- /dev/null
+++ b/docs/assets/noexecute_usecase.svg
@@ -0,0 +1,250 @@
+
diff --git a/docs/assets/refchef-cook_and_refchef-menu.svg b/docs/assets/refchef-cook_and_refchef-menu.svg
new file mode 100644
index 0000000..cd177f3
--- /dev/null
+++ b/docs/assets/refchef-cook_and_refchef-menu.svg
@@ -0,0 +1,1354 @@
+
+
+
+
+
+
+
+
+
+
+]>
+
diff --git a/docs/assets/refchef-diagram.svg b/docs/assets/refchef-diagram.svg
deleted file mode 100644
index dc7b3a9..0000000
--- a/docs/assets/refchef-diagram.svg
+++ /dev/null
@@ -1 +0,0 @@
-
\ No newline at end of file
diff --git a/docs/assets/refchef-serve.png b/docs/assets/refchef-serve.png
index e21170f..199005f 100644
Binary files a/docs/assets/refchef-serve.png and b/docs/assets/refchef-serve.png differ
diff --git a/docs/assets/refchef_overview.svg b/docs/assets/refchef_overview.svg
new file mode 100644
index 0000000..0d7f6a1
--- /dev/null
+++ b/docs/assets/refchef_overview.svg
@@ -0,0 +1,732 @@
+
+
+
+
+
+
+
+
+
+
+]>
+
diff --git a/docs/assets/refchefmenu_usecase.svg b/docs/assets/refchefmenu_usecase.svg
new file mode 100644
index 0000000..94e02a2
--- /dev/null
+++ b/docs/assets/refchefmenu_usecase.svg
@@ -0,0 +1,174 @@
+
diff --git a/docs/folders.md b/docs/folders.md
new file mode 100644
index 0000000..56dd794
--- /dev/null
+++ b/docs/folders.md
@@ -0,0 +1,68 @@
+
+RefChef creates folders to store your references. The names of these folders is based on:
+
+1. The [`master.yaml`](./specs.md#master.yaml) key (which should match the 'name' entry under 'metadata' in `master.yaml`).
+
+2. The 'component' entry under 'levels' in [`master.yaml`](./specs.md#master.yaml).
+
+ Here is the collapsed file tree that refchef created from the Tutorial part of the documentation and what the directory names are based on:
+
+```bash
+./Users/jwalla12/references #this directory is specified in refchef-cook or the config files
+βββ S_cerevisiae #this is named after the 'key' and the 'name' entry under 'metadata' in master.yaml
+ βββ bowtie2_index #this folder is created in the master.yaml `commands` section.
+ βββ bwa_index #this folder is created in the master.yaml `commands` section.
+ βββ gtf #this folder is created in the master.yaml `commands` section.
+ βββ primary #this is named after the 'component' entry under 'levels' in master.yaml
+```
+
+Here is the expanded file tree:
+
+```bash
+./Users/jwalla12/references
+βββ S_cerevisiae
+ βββ bowtie2_index
+ βΒ Β βββ metadata.txt
+ βββ bwa_index
+ βΒ Β βββ metadata.txt
+ βββ gtf
+ βΒ Β βββ CHECKSUMS
+ βΒ Β βββ Saccharomyces_cerevisiae.R64-1-1.87.gtf
+ βΒ Β βββ final_checksums.md5
+ βΒ Β βββ metadata.txt
+ βΒ Β βββ postdownload-checksums.md5
+ βββ primary
+ βββ CHECKSUMS
+ βββ Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ βββ bowtie2_index -> /Users/jwalla12/references/S_cerevisiae/bowtie2_index
+ βββ bwa_index -> /Users/jwalla12/references/S_cerevisiae/bwa_index
+ βββ final_checksums.md5
+ βββ metadata.txt
+ βββ postdownload-checksums.md5
+```
+This indicates that refchef has created symlinked directories for bowtie2 and bwa indices in `/Users/jwalla12/references/S_cerevisiae/primary`. This process (linking reference and index) is triggered by:
+1. The addition of the `src:` line in bowtie2.yaml and bwa.yaml
+2. Specifying the master.yaml `levels` are `indices:` in the master.yaml
+
+If we look at the output from [`refchef-menu`](./usage.md#refchef-menu), we see the UUID for the primary reference file, which is `dff337a6-9a1d-3313-8ced-dc6f3bfc9689`.
+
+```bash
+β πΆ RefChef Menu βββββββββββββββββββββββββ¬ββββββββββββ¬ββββββββββββββββββββββββββββββββββββββββββββ¬βββββββββββββββββββββββββββββββββββββββ
+β name β organism β component β description β uuid β
+ββββββββββββββββΌβββββββββββββββββββββββββββΌββββββββββββΌββββββββββββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββββββββββββββββ€
+β S_cerevisiae β Saccharomyces cerevisiae β primary β corresponds to ganbank id GCA_000146045.2 β dff337a6-9a1d-3313-8ced-dc6f3bfc9689 β
+ββββββββββββββββ΄βββββββββββββββββββββββββββ΄ββββββββββββ΄ββββββββββββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββββββββββββββββ
+```
+In this clipping from bowtie2.yaml, note that the UUID was indicated in the `src:` entry under `component`, `indices`, and `levels`.
+
+```yaml
+S_cerevisiae:
+ levels:
+ indices:
+ - component: bowtie2_index
+ complete:
+ status: false
+ src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
+```
+
+This indicates which primary reference was used to create the index file.
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
index c3e7bad..474b235 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -8,5 +8,12 @@
---
+`RefChef` is a reference management system that includes additional tools to record the provenance of reference sequences, indices, and annotations. It was created to enable reproducible research.
-RefChef is a reference management tool used to: (1) document the exact steps undertaken in the retrieval of genomic references; (2) maintain the associated metadata; (3) provide a mechanism for automatically reproducing retrieval and creation of an exact copy of genomic references.
+`RefChef` will:
+
+1. Document the exact steps undertaken in the retrieval and processing of genomic references
+2. Maintain the associated metadata
+3. Provide a mechanism for automatically reproducing retrieval and creation of an exact copy of genomic references
+
+![Diagram](assets/refchef_overview.svg)
diff --git a/docs/inputs.md b/docs/inputs.md
new file mode 100644
index 0000000..a44045d
--- /dev/null
+++ b/docs/inputs.md
@@ -0,0 +1,112 @@
+---
+
+###**master.yaml**
+
+**overview**
+Refchef uses YAML files that are composed of nested entry and value pairs -- for example, the entry and value pair `common_name`: `yeast`. The spacing and indentation of the entries and values are meaningful - Refchef uses the convention of using 2 spaces to indent each subsequent level of the entries and values in the YAML and a `:` and space are between each entry and value. Some entries in the yaml will have a preceeding `-` and a space before them (such as `- component:` and the commands under the `commands` header), which are required for Refchef to properly process the YAML.
+
+See the [`master.yaml` file specifications](./specs.md#master.yaml) for more information.
+
+Example `master.yaml` before processing:
+```yaml
+S_cerevisiae:
+ metadata:
+ name: S_cerevisiae
+ common_name: yeast
+ ncbi_taxon_id: 4932
+ organism: Saccharomyces cerevisiae
+ organization: ensembl
+ custom: no
+ description: corresponds to genbank id GCA_000146045.2
+ downloader: joselynn wallace
+ ensembl_release_number: 87
+ accession:
+ genbank:
+ refseq:
+ levels:
+ references:
+ - component: primary
+ complete:
+ status: false
+ commands:
+ - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
+ - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/CHECKSUMS
+ - md5 *.gz > postdownload-checksums.md5
+ - gunzip *.gz
+ - md5 *.* > final_checksums.md5
+```
+
+The string of text entered in the `key` field (`S_cerevisiae` in the above example) will be used to create a folder inside the directory you specify as your output in your config file (`cfg.ini` or `cfg.yaml`) or `refchef-cook` arguments. In the previous quickstart example, we used `/Users/jwalla12/references` as the output directory for `refchef-cook`. Here is the collapsed file tree that refchef created, note that the folder containing the primary reference is nested inside a folder named `S_cerevisiae` based on the `key`.
+
+```bash
+./Users/jwalla12/references #this directory is specified in refchef-cook or the config files
+βββ S_cerevisiae
+ βββ bowtie2_index
+ βββ bwa_index
+ βββ gtf
+ βββ primary
+
+```
+
+**master.yaml metadata**
+The `metadata` section of `master.yaml` contains information about the references, including the organism name, taxon_id, etc.
+
+!!! Caution
+ When running a new YAML file to add additional information to a primary reference, metadata entries present in the initial [`master.yaml`](#master.yaml) file can be omitted (for example, `ncbi_taxon_id:`, `common_name:`). When adding indices or annotations to a primary reference already in [`master.yaml`](#master.yaml), the metadata in [`master.yaml`](#master.yaml) will be overwritten by the metadata in the new.yaml file. This could be helpful in situations where you want to update the metadata fields.
+
+**master.yaml levels**
+The `levels` section contains higher level information about the references, including when they were downloaded and the exact commands used to download and process the references.
+
+!!! Caution
+ The entry `status` must be set to `false` for Refchef to exeecute the commands in the code block. If it is set to `true`, the code will not execute (even if the -e flag is set). After a code block is executed, the `false` flag will flip to `true` automatically and the `time:` entry will appear under the `status` header. The `time:` header will be populated with the datetime stamp the reference was downloaded.
+
+**master.yaml commands**
+This portion of the `master.yaml` should be populated with the specific commands you want to execute to download and process your reference. Each command should be prepended with a `-` and a space.
+
+!!! Caution
+ Each time files are processed using a set of commands in the YAML, the last command must run `md5` on all of the files and direct the output to a file called `final_checksums.md5`.
+
+---
+
+### **cfg.yaml**
+**overview**
+Refchef requires configuration information, which can be passed as arguments or specified in a configuration file. A `cfg.yaml` is one option for configuration and should contain the following fields. Also indicated below: If filling out the field is required, their expected format, and a brief description of their contents.
+
+
+See the [`cfg.yaml` file specifications](./specs.md#cfg.yaml) for more information.
+
+**example:**
+```yaml
+config-yaml:
+ path-settings:
+ reference-directory: /Users/jwalla12/references
+ git-directory: /Users/jwalla12/remote_references
+ remote-repository: jrwallace/remote_references
+ log-settings:
+ log: 'yes'
+```
+
+
+---
+
+### **cfg.ini**
+**overview**
+Refchef requires configuration information, which can be passed as arguments or specified in a configuration file. A `cfg.ini` is one option for configuration and should contain the following fields. Also indicated below: If filling out the field is required, their expected format, and a brief description of their contents.
+
+See the [`cfg.ini` file specifications](./specs.md#cfg.ini) for more information.
+
+**example:**
+
+```toml
+[path-settings]
+reference-directory=/Users/jwalla12/references
+git-directory=/Users/jwalla12/remote_references
+remote-repository=jrwallace/remote_references
+[log-settings]
+log=yes
+[runtime-settings]
+break-on-error=yes
+verbose=yes
+```
+
+
diff --git a/docs/installation.md b/docs/installation.md
index 14a70ff..b3150fc 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -1,10 +1,48 @@
+### Install RefChef
+
To install from PyPI using **pip**:
`pip install refchef`
To install using **Anaconda Python**:
`conda install -c compbiocore refchef`
+### Set up Git and GitHub
+RefChef uses Git repositories for version control of the `master.yaml` file, which contains a list of all the references on the system and their provenance. You can also use GitHub to remotely host your repositories, but this is optional.
+
+Before using RefChef, set up [git](https://help.github.com/en/articles/set-up-git).
+
+If you want to use GitHub to host your repositories, create a GitHub account and set up an [access token](https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line).
+![](assets/github_token.png)
+
+Additionally, create a [`.gitignore` file](https://help.github.com/en/articles/ignoring-files)...
+
+```bash
+touch .gitignore
+```
+
+...and add `.env` to the `.gitignore` by pasting the following into the `.gitignore` file.
+```bash
+# ignore env files
+*.env
+```
+
+Now create a `.env` file...
+```bash
+touch .env
+```
+
+... and paste the contents of the `.env.template` file in the `RefChef` home directory into the `.env` file, which will now look like this:
+
+```bash
+GITHUB_TOKEN=
+```
+
+Then, paste the GitHub access token into the `GITHUB_TOKEN=` line copied over from the `env.template` file. For example, your `.env` file might now look like this:
+
+```bash
+GITHUB_TOKEN=5c25370fcf7db4a676d98d72700e2922654485ed
+```
### Development
To install a **development version** from the current directory:
```bash
@@ -16,14 +54,6 @@ pip install -e .
Run unit tests as:
`python setup.py test`
-### Set up `.env` file with GitHub Access Token
-Sensitive environment variables are stored in the .env file. This file is included in .gitignore intentionally, so that it is never committed.
-- Create a `.env` file and copy into it the contents of `.env.template`
-- Get your [GitHub Access Token](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/) and add to the `.env` file.
-- Make sure to add the GH_TOKEN variable to the environment of the CI provider you use.
-
-![](assets/github_token.png)
-
## Contributing
Contributions consistent with the style and quality of existing code are
@@ -33,7 +63,6 @@ Check the issues page of this repository for available work.
### Committing
-
This project uses [commitizen](https://pypi.org/project/commitizen/)
to ensure that commit messages remain well-formatted and consistent
across different contributors.
@@ -47,17 +76,17 @@ pip install commitizen
```
To start work on a new change, pull the latest `develop` and create a
-new *topic branch* (e.g. feature-resume-model`,
+new *topic branch* (e.g. `feature-resume-model`,
`chore-test-update`, `bugfix-bad-bug`).
+Add your changes to the current branch.
```bash
git add .
```
-To commit, run the following command (instead of ``git commit``) and
+To commit your changes, run the following command (instead of `git commit`) and
follow the directions:
-
```bash
cz commit
```
diff --git a/docs/overview.md b/docs/overview.md
new file mode 100644
index 0000000..e98aa35
--- /dev/null
+++ b/docs/overview.md
@@ -0,0 +1,26 @@
+**RefChef comes with two commands:**
+
+[**`refchef-cook`**](./usage.md#refchef-cook):
+Will read recipes and execute the commands that will retrieve the references, indices, or annotations based on the contents of [`master.yaml`](./inputs.md#master.yaml).
+
+[**`refchef-menu`**](./usage.md#refchef-menu):
+Provides a way for the user to list all references present in the system, based on [`master.yaml`](./inputs.md#master.yaml), as well as filter the list of references based on metadata options.
+
+![Diagram](assets/refchef-cook_and_refchef-menu.svg)
+
+**RefChef requires a [`master.yaml`](./inputs.md#master.yaml) file:**
+
+In addition to the [`refchef-cook`](./usage.md#refchef-cook) and [`refchef-menu`](./usage.md#refchef-menu) commands, RefChef requires a [`master.yaml`](./inputs.md#master.yaml) containing a list of references, indices, annotations, and metadata, as well as the commands necessary to download and process the files.
+When [`refchef-cook`](./usage.md#refchef-cook) is executed, RefChef will append the [`master.yaml`](./inputs.md#master.yaml) to change the `complete` option from `false` to `true`and will also add a `uuid` for each reference, the date the files were downloaded and their location, as well as a complete list of files downloaded.
+Based on the arguments you pass to [`refchef-cook`](./usage.md#refchef-cook), it will either commit those changes to [`master.yaml`](./inputs.md#master.yaml) to a local repository or commit and push the changes to a remote repository.
+
+**RefChef requires configuration information:**
+
+[`refchef-cook`](./usage.md#refchef-cook) and [`refchef-menu`](./usage.md#refchef-menu) both require some configuration information, including:
+
+1. Where you'd like the references to be saved
+2. The local git repository for version control of references
+3. The remote github repository for version control of reference
+ sequences (optional).
+
+This information can be specified in a [`cfg.yaml`](./inputs.md#cfg.yaml) file, a [`cfg.ini`](./inputs.md#cfg.ini) file, or it can be passed as arguments to [`refchef-cook`](./usage.md#refchef-cook).
\ No newline at end of file
diff --git a/docs/quickstart.md b/docs/quickstart.md
new file mode 100644
index 0000000..1de46ce
--- /dev/null
+++ b/docs/quickstart.md
@@ -0,0 +1,255 @@
+This quickstart assumes that [bwa](http://bio-bwa.sourceforge.net/) and [bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml) are installed and in your current path.
+
+Create a [remote repository](https://help.github.com/en/articles/creating-a-new-repository) and [clone it](https://help.github.com/en/articles/cloning-a-repository).
+
+Create a directory for refchef to save your references.
+
+Create a [`master.yaml`](./inputs.md#master.yaml) file and save it in your local git repository directory. Here is a [`master.yaml`](./inputs.md#master.yaml) file that will download a yeast genome from Ensembl:
+
+```yaml
+S_cerevisiae:
+ metadata:
+ name: S_cerevisiae
+ common_name: yeast
+ ncbi_taxon_id: 4932
+ organism: Saccharomyces cerevisiae
+ organization: ensembl
+ custom: no
+ description: corresponds to genbank id GCA_000146045.2
+ downloader: joselynn wallace
+ ensembl_release_number: 87
+ accession:
+ genbank:
+ refseq:
+ levels:
+ references:
+ - component: primary
+ complete:
+ status: false
+ commands:
+ - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
+ - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/CHECKSUMS
+ - md5 *.gz > postdownload-checksums.md5
+ - gunzip *.gz
+ - md5 *.* > final_checksums.md5
+```
+Pass the configuration arguments in a config file or directly to [`refchef-cook`](./usage.md#refchef-cook) (as seen in the following example):
+
+```
+refchef-cook -e -o /Users/jwalla12/references -gl /Users/jwalla12/remote_references -gr jrwallace/remote_references --git commit -l
+```
+
+After [`refchef-cook`](./usage.md#refchef-cook) is run, [`master.yaml`](./inputs.md#master.yaml) will reflect that you have downloaded the reference and it will now look like this:
+
+```yaml
+S_cerevisiae:
+ metadata:
+ name: S_cerevisiae
+ common_name: yeast
+ ncbi_taxon_id: 4932
+ organism: Saccharomyces cerevisiae
+ organization: ensembl
+ custom: false
+ description: corresponds to genbank id GCA_000146045.2
+ downloader: joselynn wallace
+ ensembl_release_number: 87
+ accession:
+ genbank: null
+ refseq: null
+ levels:
+ references:
+ - component: primary
+ complete:
+ status: true
+ time: '2019-07-25 09:08:37.478553'
+ commands:
+ - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
+ - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/CHECKSUMS
+ - md5 *.gz > postdownload-checksums.md5
+ - gunzip *.gz
+ - md5 *.* > final_checksums.md5
+ location: /Users/jwalla12/references/S_cerevisiae/primary
+ files:
+ - metadata.txt
+ - postdownload-checksums.md5
+ - CHECKSUMS
+ - final_checksums.md5
+ - Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ uuid: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
+```
+
+Make another .yaml file to create a bowtie2 index of this genome, call the file `bowtie2.yaml`.
+
+```yaml
+S_cerevisiae:
+ levels:
+ indices:
+ - component: bowtie2_index
+ complete:
+ status: false
+ src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
+ commands:
+ - mkdir /Users/jwalla12/references/S_cerevisiae/bowtie2_index
+ - cd /Users/jwalla12/references/S_cerevisiae/bowtie2_index
+ - ln -s /Users/jwalla12/references/S_cerevisiae/primary/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa ./Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ - bowtie2-build Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa S_cerevisiae
+ - md5 ./*.* > ./final_checksums.md5
+```
+
+Then use [`refchef-cook`](./usage.md#refchef-cook) and specify the new yaml to add to [`master.yaml`](./inputs.md#master.yaml).
+
+```
+refchef-cook -e -o /Users/jwalla12/references -gl /Users/jwalla12/remote_references -gr jrwallace/remote_references -n /Users/jwalla12/remote_references/bowtie2.yaml -g commit -l
+```
+
+Make another .yaml file to create a bwa index of this genome, call the file `bwa.yaml`.
+
+```yaml
+S_cerevisiae:
+ levels:
+ indices:
+ - component: bwa_index
+ complete:
+ status: false
+ src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
+ commands:
+ - mkdir /Users/jwalla12/references/S_cerevisiae/bwa_index
+ - cd /Users/jwalla12/references/S_cerevisiae/bwa_index
+ - ln -s /Users/jwalla12/references/S_cerevisiae/primary/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa ./Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ - bwa index Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa -p S_cerevisiae
+ - md5 ./*.* > ./final_checksums.md5
+```
+
+Then use [`refchef-cook`](./usage.md#refchef-cook) and specify the new yaml to add to [`master.yaml`](./inputs.md#master.yaml).
+
+```
+refchef-cook -e -o /Users/jwalla12/references -gl /Users/jwalla12/remote_references -gr jrwallace/remote_references -n /Users/jwalla12/remote_references/bwa.yaml -g commit -l
+```
+
+We can also track annotation files for the reference genome. Make the following .yaml file and call it `gtf.yaml`:
+
+```yaml
+S_cerevisiae:
+ levels:
+ annotations:
+ - component: gtf
+ complete:
+ status: false
+ commands:
+ - wget ftp://ftp.ensembl.org/pub/release-87/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.87.gtf.gz
+ - wget ftp://ftp.ensembl.org/pub/release-87/gtf/saccharomyces_cerevisiae/CHECKSUMS
+ - md5 *.gz > postdownload-checksums.md5
+ - gunzip *.gz
+ - md5 *.* > final_checksums.md5
+```
+
+Then use [`refchef-cook`](./usage.md#refchef-cook) and specify the new yaml to add to [`master.yaml`](./inputs.md#master.yaml).
+
+```
+refchef-cook -e -o /Users/jwalla12/references -gl /Users/jwalla12/remote_references -gr jrwallace/remote_references -n /Users/jwalla12/remote_references/gtf.yaml -g commit -l
+```
+We can see what references are available using [`refchef-menu`](./usage.md#refchef-menu):
+```
+refchef-menu -f /Users/jwalla12/remote_references/master.yaml
+```
+```
+β πΆ RefChef Menu βββββββββββββββββββββββββ¬ββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββββββββββ¬βββββββββββββββββββββββββββββββββββββββ
+β name β organism β component β description β uuid β
+ββββββββββββββββΌβββββββββββββββββββββββββββΌββββββββββββββββΌββββββββββββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββββββββββββββββ€
+β S_cerevisiae β Saccharomyces cerevisiae β gtf β corresponds to genbank id GCA_000146045.2 β 5f7ae94c-2e51-3cc6-bcbf-6e251c75ef2f β
+β S_cerevisiae β Saccharomyces cerevisiae β bowtie2_index β corresponds to genbank id GCA_000146045.2 β 93393699-cb40-3ad7-ac07-ae4bdb1efd3e β
+β S_cerevisiae β Saccharomyces cerevisiae β bwa_index β corresponds to genbank id GCA_000146045.2 β dff337a6-9a1d-3313-8ced-dc6f3bfc9689 β
+β S_cerevisiae β Saccharomyces cerevisiae β primary β corresponds to genbank id GCA_000146045.2 β dff337a6-9a1d-3313-8ced-dc6f3bfc9689 β
+ββββββββββββββββ΄βββββββββββββββββββββββββββ΄ββββββββββββββββ΄ββββββββββββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββββββββββββββββ
+```
+We can also get this information if we look at [`master.yaml`](./inputs.md#master.yaml):
+```yaml
+S_cerevisiae:
+ metadata:
+ name: S_cerevisiae
+ common_name: yeast
+ ncbi_taxon_id: 4932
+ organism: Saccharomyces cerevisiae
+ organization: ensembl
+ custom: false
+ description: corresponds to genbank id GCA_000146045.2
+ downloader: joselynn wallace
+ ensembl_release_number: 87
+ accession:
+ genbank: null
+ refseq: null
+ levels:
+ references:
+ - component: primary
+ complete:
+ status: true
+ time: '2019-07-25 16:26:42.700668'
+ commands:
+ - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
+ - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/CHECKSUMS
+ - md5 *.gz > postdownload-checksums.md5
+ - gunzip *.gz
+ - md5 *.* > final_checksums.md5
+ location: /Users/jwalla12/references/S_cerevisiae/primary
+ files:
+ - metadata.txt
+ - postdownload-checksums.md5
+ - CHECKSUMS
+ - final_checksums.md5
+ - Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ uuid: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
+ indices:
+ - component: bowtie2_index
+ complete:
+ status: true
+ time: '2019-07-25 16:26:43.971349'
+ src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
+ commands:
+ - mkdir /Users/jwalla12/references/yeast_refs/bowtie2_index
+ - cd /Users/jwalla12/references/yeast_refs/bowtie2_index
+ - ln -s /Users/jwalla12/references/yeast_refs/primary/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ /Users/jwalla12/references/yeast_refs/bowtie2_index/
+ - bowtie2-build /Users/jwalla12/references/yeast_refs/bowtie2_index/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ S_cerevisiae
+ - md5 /Users/jwalla12/references/yeast_refs/bowtie2_index/*.* > /Users/jwalla12/references/yeast_refs/bowtie2_index/final_checksums.md5
+ location: /Users/jwalla12/references/S_cerevisiae/bowtie2_index
+ files:
+ - metadata.txt
+ uuid: 84928c3e-af1a-11e9-a45e-8c8590bd206d
+ - component: bwa_index
+ complete:
+ status: true
+ time: '2019-07-25 16:26:45.183284'
+ src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
+ commands:
+ - mkdir /Users/jwalla12/references/yeast_refs/bwa_index
+ - cd /Users/jwalla12/references/yeast_refs/bwa_index
+ - ln -s /Users/jwalla12/references/yeast_refs/primary/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ /Users/jwalla12/references/yeast_refs/bwa_index/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ - bwa index /Users/jwalla12/references/yeast_refs/bwa_index/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ > /Users/jwalla12/references/yeast_refs/bwa_index/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
+ - md5 /Users/jwalla12/references/yeast_refs/bwa_index/*.* > /Users/jwalla12/references/yeast_refs/bwa_index/final_checksums.md5
+ location: /Users/jwalla12/references/S_cerevisiae/bwa_index
+ files:
+ - metadata.txt
+ uuid: 854b7780-af1a-11e9-a9f8-8c8590bd206d
+ annotations:
+ - component: gtf
+ complete:
+ status: true
+ time: '2019-07-25 16:26:54.326082'
+ commands:
+ - wget ftp://ftp.ensembl.org/pub/release-87/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.87.gtf.gz
+ - wget ftp://ftp.ensembl.org/pub/release-87/gtf/saccharomyces_cerevisiae/CHECKSUMS
+ - md5 *.gz > postdownload-checksums.md5
+ - gunzip *.gz
+ - md5 *.* > final_checksums.md5
+ location: /Users/jwalla12/references/S_cerevisiae/gtf
+ files:
+ - metadata.txt
+ - postdownload-checksums.md5
+ - Saccharomyces_cerevisiae.R64-1-1.87.gtf
+ - CHECKSUMS
+ - final_checksums.md5
+ uuid: 5f7ae94c-2e51-3cc6-bcbf-6e251c75ef2f
+```
\ No newline at end of file
diff --git a/docs/specs.md b/docs/specs.md
index 20f63aa..b2e2b58 100644
--- a/docs/specs.md
+++ b/docs/specs.md
@@ -1,71 +1,128 @@
-# Specifications for `master.yaml`
+### `master.yaml`
+
+The [`master.yaml`](./inputs.md#master.yaml) file is the main source of information that RefChef uses to retrieve references, indices, and annotations. It is composed of sequences of code blocks that correspond to each reference. Each code block in [`master.yaml`](./inputs.md#master.yaml) starts with a `key`, followed by `metadata` and `levels`.
+
+See the [`master.yaml` overview and usage](./inputs.md#master.yaml) for more information.
+
---
-```yaml
-reference_test1:
- metadata:
- name: reference_test1
- species: mouse
- organization: ucsc
- downloader: fgelin
- levels:
- references:
- - component: primary
- complete:
- status: false
- commands:
- - wget -nv https://s3.us-east-2.amazonaws.com/refchef-tests/chr1.fa.gz
- - md5 *.fa.gz > postdownload_checksums.md5
- - gunzip *.gz
- - md5 *.fa > final_checksums.md5
-```
-
-The `master.yaml` file is the main source of information that RefChef uses to retrieve references, indices, and annotations.
-
-### Specifications
+
+The `key` section consists of:
+
+`:`
+Expected format: String where is the name of the reference.
+
---
-Each block has a key with the name of the reference, index, or annotation.
+The `metadata` section consists of:
+
+>`metadata.name`
+>Expected format: string, should be the same as the block's `key`
+
+>`metadata.common_name`
+>Expected format: string
+
+>`metadata.ncbi_taxon_id`
+>Expected format: integer, based on [NCBI taxon ID](https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi)
+
+>`metadata.organism`
+>Expected format: string
+
+>`metadata.organization`
+>Expected format: string
+
+>`metadata.custom`
+>Expected format: string
+
+>`metadata.description`
+>Expected format: string
+
+>`metadata.downloader`
+>Expected format: string
+
+>`metadata.ensembl_release_number`
+>Expected format: integer
+
+>>`metadata.accession.genbank`
+>>Expected format: string
+
+>>`metadata.accession.refseq`
+>>Expected format: string
+
+---
-`reference_name.metadata`
-Expected format: key - value mapping
+The `levels` section consists of:
-`reference_name.metadata.name`
-Expected format: string, should be the same as the block's key
+>`levels.`
+>Where : `references`, `annotations`, or `indices`
+
+>>`levels..- component`
+>>Expected format: string
+
+>>>`levels..complete.status`
+>>>Expected format: boolean (note that if `complete.status` is set to `true` RefChef will skip the current block and not retrieve any file. RefChef automatically changes the status to `true` after retrieving files for the first time.)
+
+>>`levels..src`
+ Expected format: UUID string from existing reference, when adding an index file for a reference RefChef will create a symlink to the index files in the reference folder.
+
+>>`levels..commands`
+ Expected format: Each command should start with `- `, this section is a list of commands to download and process each reference.
+
+After [`refchef-cook`](./usage.md#refchef-cook) is run and references are downloaded, `levels..complete.status: false` will change to `levels..complete.status: true` and the following fields will be added to `master.yaml`
+
+>>>`levels..complete.time`
+>>>Expected format: RefChef will autopopulate this field with the date and time stamp the reference was downloaded if `levels..complete.status: true`
+
+>>`levels..location`
+ Expected format: Refchef will autopopulate this field with the directory where downloaded files are stored if `levels..complete.status: true`
+
+>>`levels..files`
+ Expected format: Refchef will autopopulate this field with a list of files that were downloaded if `levels..complete.status: true`
+
+>>`levels..uuid`
+ Expected format: Refchef will autopopulate this field with a UUID for your reference file if `levels..complete.status: true`
+---
+
+### `cfg.yaml`
+
+If using a `cfg.yaml` file, the `cfg.yaml` file should follow the following specs:
+
+>>`config-yaml.path-settings.reference-directory`
+Expected format: String, path to reference storage directory
+
+>>`config-yaml.path-settings.git-directory`
+Expected format: String, path to local git repository
+
+>>`config-yaml.path-settings.remote-repository`
+Expected format: String, remote git repository, should be in the format of `user/repo`
+
+>>`config-yaml.log-settings.log`
+Expected format: String, should be either 'yes' or 'no' in single quotes, indicating whether or not log files will be made
+
+Also see the [`cfg.yaml` overview and example.](./usage.md#cfg.yaml)
+
+---
+### `cfg.ini`
-`reference_name.metadata.species`
-Expected format: string
+If using a `cfg.ini` file, the `cfg.ini` file should follow the following specs:
-`reference_name.metadata.organization`
-Expected format: string
+`[path-settings].reference-directory=`
+Expected format: String, path to reference storage directory
-`reference_name.metadata.downloader`
-Expected format: string
+`[path-settings].git-directory=`
+Expected format: String, path to local git repository
-`reference_name.levels`
-Expected format: key - value mapping
+`[path-settings].remote-repository=`
+Expected format: String, remote git repository, should be in the format of `user/repo`
-`reference_name.levels.`
-Where : `references`, `annotations`, or `indices`
-Expected format: list of key - value mappings
+`[log-settings].log=`
+Expected format: String, should be either 'yes' or 'no', indicating whether or not log files will be made
-> `reference_name.levels..-`
+`[runtime-settings].break-on-error=`
+Expected format: String, should be either 'yes' or 'no', indicating how RefChef should respond when encountering an error
-> `component`
-Expected format: string
-`complete.status`
-Expected formate: boolean (note that if `complete.status` is set to `true` RefChef will skip the current block and not retrieve any file. RefChef automatically changes the status to true after retrieving files for the first time.)
-`src`
-Expected format: UUID v4, or string. If a UUID of an existing reference is entered, RefChef will create a symlink to the index files from the reference folder.
-`commands`
-Expected format: list of strings
+`[runtime-settings].verbose=`
+Expected format: String, should be either 'yes' or 'no', toggles between verbosity output settings
-After RefChef runs and retrieves the files, the following fields will be appended the following fields to `master.yaml`:
+Also see the [`cfg.ini` overview and example.](./usage.md#cfg.ini)
->`reference_name.levels..-`
-> `location`
-Expected format: string
-`files`
-Expected format: list of strings
-`uuid`
-Expected format: UUID v4
diff --git a/docs/tutorials/quickstart.md b/docs/tutorials/quickstart.md
deleted file mode 100644
index 8847f50..0000000
--- a/docs/tutorials/quickstart.md
+++ /dev/null
@@ -1,301 +0,0 @@
-# Overview
-RefChef comes with two main commands (`refchef-cook` and `refchef-menu`).
-
-- **refchef-cook** *link to options section* : Will read recipes and execute the commands that will retrieve the references, indices, or annotations.
-- **refchef-menu**: Provides an easy way to summarize the items already on the system.
-
-# Quickstart
-
-See the installation instructions for how to install refchef. Create
-your own local repository for tracking references:
-
-```
-cd /Volumes/jwalla12
-git init local_references
-```
-
-Create a directory for refchef to store your references:
-
-```
-mkdir /Volumes/jwalla12/references
-```
-
-Create a `master.yaml` file and save it in your git repository. This
-file will contain the commands that will be executed to download your
-references, as well as some additional metadata. For more information
-about the details of the .yaml file format, see
-(https://compbiocore.github.io/refchef/specs/).
-
-!!! Note
- the creation of the `final_checksums.md5` file should always be included in the `master.yaml` file. As a minimal example, here is a `master.yaml` file that will download the grch38 human genome from Ensembl:
-
-```
-grch38:
- metadata:
- name: grch38_release87
- species: Homo sapiens
- organization: ensembl
- downloader: jrwallace
- levels:
- references:
- - component: primary
- complete:
- status: false
- commands:
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/dna/CHECKSUMS
- - md5sum *.gz > postdownload-checksums.md5
- - gunzip *.gz
- - md5sum *.* > final_checksums.md5
-
-```
-In addition to the .yaml file, you will also need to specify the
-following details:
-
-- where you'd like the references to be saved,
-- the local git repository for version control of references, and
-- the remote github repository for version control of reference
- sequences.
-
-!!!tip
- There are a few options for relaying this information to refchef:
- - they can be specified in a `cfg.ini` file or a `cfg.yaml` file
- - or you can pass them as arguments to `refchef-cook`, the command
- that will read your `master.yaml` file and download the references.
-
- !!!note
- The following is an example where arguments are passed to `refchef-cook` and references are not pushed to a remote repository:
- ```
- refchef-cook -e -o /Volumes/jwalla12/references -gl /Volumes/jwalla12/local_references
- ```
-
-!!!todo
- add examples re: using a cfg file and remote repo
-
-Then you'll see the following:
-
-```
-/anaconda3/lib/python3.7/site-packages/refchef/utils.py:12: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
- dict_ = yaml.load(yml)
- πΆ RefChef... getting reference: grch38, component: primary
-Running command "wget ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
---2019-07-12 15:56:56-- ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
- => βHomo_sapiens.GRCh38.dna.primary_assembly.fa.gzβ
-Resolving ftp.ensembl.org (ftp.ensembl.org)... 193.62.193.8
-Connecting to ftp.ensembl.org (ftp.ensembl.org)|193.62.193.8|:21... connected.
-Logging in as anonymous ... Logged in!
-==> SYST ... done. ==> PWD ... done.
-==> TYPE I ... done. ==> CWD (1) /pub/release-87/fasta/homo_sapiens/dna ... done.
-==> SIZE Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz ... 881214448
-==> PASV ... done. ==> RETR Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz ... done.
-Length: 881214448 (840M) (unauthoritative)
-
-Homo_sapiens.GRCh38.d 100%[=======================>] 840.39M 6.71MB/s in 4m 26s
-
-2019-07-12 16:01:25 (3.16 MB/s) - βHomo_sapiens.GRCh38.dna.primary_assembly.fa.gzβ saved [881214448]
-
-Running command "wget ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/dna/CHECKSUMS"
---2019-07-12 16:01:25-- ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/dna/CHECKSUMS
- => βCHECKSUMSβ
-Resolving ftp.ensembl.org (ftp.ensembl.org)... 193.62.193.8
-Connecting to ftp.ensembl.org (ftp.ensembl.org)|193.62.193.8|:21... connected.
-Logging in as anonymous ... Logged in!
-==> SYST ... done. ==> PWD ... done.
-==> TYPE I ... done. ==> CWD (1) /pub/release-87/fasta/homo_sapiens/dna ... done.
-==> SIZE CHECKSUMS ... 5010
-==> PASV ... done. ==> RETR CHECKSUMS ... done.
-Length: 5010 (4.9K) (unauthoritative)
-
-CHECKSUMS 100%[=======================>] 4.89K --.-KB/s in 0s
-
-2019-07-12 16:01:27 (97.5 MB/s) - βCHECKSUMSβ saved [5010]
-
-Running command "md5sum *.gz > postdownload-checksums.md5"
-Running command "gunzip *.gz"
-Running command "md5sum *.* > final_checksums.md5"
-
-```
-
-After this command is run, master.yaml will reflect that you have downloaded the references and it will now look like this:
-
-```
-grch38:
- metadata:
- name: grch38_release87
- species: Homo sapiens
- organization: ensembl
- downloader: jrwallace
- levels:
- references:
- - component: primary
- complete:
- status: true
- time: 2019-07-12 16:02:25.505498
- commands:
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/homo_sapiens/dna/CHECKSUMS
- - md5sum *.gz > postdownload-checksums.md5
- - gunzip *.gz
- - md5sum *.* > final_checksums.md5
- location: /Volumes/jwalla12/references/grch38/primary
- files:
- - CHECKSUMS
- - final_checksums.md5
- - Homo_sapiens.GRCh38.dna.primary_assembly.fa
- - metadata.txt
- - postdownload-checksums.md5
-
-```
-
-todo: add information re: adding references already present elsewhere (should the command be more like a cp command?)
-
-#### User workflow diagram
-
-![Diagram](../assets/refchef-diagram.svg)
-
-RefChef comes with two main scripts. `refchef-cook` will parse `master.yaml`, execute the commands listed (download and process reference files), commit, and push the `master.yaml` using git. `refchef-menu` is used to list the references already downloaded and processed. It also provides an easy way to find a reference uuid for use when processing new indices.
-Both scripts can take a `--config (-c)` argument with the path for a config file, that can be one of the following formats:
-
-`cfg.yaml`:
-```yaml
-config-yaml:
- path-settings:
- reference-directory: ~/data/references_dir # directory where references will be downloaded and processed.
- github-directory: ~/data/git_local # local git repository where `master.yaml` is located.
- remote-repository: user/repo # remote user and repository for version control of `master.yaml`
- log-settings:
- log: 'yes'
-```
-
-`cfg.ini`:
-```toml
-[path-settings]
-reference-directory=~/data/references_dir #directory where references will be downloaded and processed.
-git-directory=~/data/git_local #local git repository where `master.yaml` is located.
-remote-repository=user/repo # remote user and repository for version control of `master.yaml`
-[log-settings]
-log=yes
-[runtime-settings]
-break-on-error=yes
-verbose=yes
-```
-
-!!! Note
- You can opt not to use a config file. In that case, when using `refchef-menu`, you must pass the argument `--master (-m)` with he path to the `master.yaml` file.
- When using `refchef-cook`, you must pass at least the output directory (``--outdir, -o`) and the path to the local git directory, where the `master.yaml` file is located (``--git_local, -gl`). If you want the changes to `master.yaml` to be pushed to a git service, you must also pass `--git_remote (-gr)`.
-
-### `refchef-cook`
-
-#### Downloading and processing references, indices, or annotations.
-This command will read a `master.yaml` located in the `github-directory` path from the config file, or the directory passed to `--git_local`. The `master.yaml` file contains a list of references, indices, and annotations, as well as their metadata, and commands necessary to download and process the files (see example below).
-The `master.yaml` file stores all the information about a reference that is downloaded or will be downloaded. When `refchef-cook -e` is executed, the files are downloaded to the output directory and processed. In addition, RefChef updates the status of the complete option to `true` in the `master.yaml`, it also adds an `uuid`, the date, location, and list of files. If a reference has the `true` in the complete status, that entry will not be processed again.
-
-Example `master.yaml` before processing:
-
-```yaml
-reference_test1:
- metadata:
- name: reference_test1
- species: mouse
- organization: ucsc
- downloader: fgelin
- levels:
- references:
- - component: primary
- complete:
- status: false
- commands:
- - wget -nv https://s3.us-east-2.amazonaws.com/refchef-tests/chr1.fa.gz
- - md5 *.fa.gz > postdownload_checksums.md5
- - gunzip *.gz
- - md5 *.fa > final_checksums.md5
-```
-
-Example `master.yaml` after processing:
-```yaml
-reference_test1:
- metadata:
- name: reference_test1
- species: mouse
- organization: ucsc
- downloader: fgelin
- levels:
- references:
- - component: primary
- complete:
- status: true
- time: 2018-12-20 11:14:13.153237
- commands:
- - wget -nv https://s3.us-east-2.amazonaws.com/refchef-tests/chr1.fa.gz
- - md5 *.fa.gz > postdownload_checksums.md5
- - gunzip *.gz
- - md5 *.fa > final_checksums.md5
- location: refchef-data/reference_test1/primary
- files:
- - chr1.fa
- - metadata.txt
- - postdownload_checksums.md5
- - final_checksums.md5
- uuid: 8040b09f-3844-3c42-b765-1f6a32614895
-```
-
-#### Downloading an index linked to a reference.
-
-Indices can be downloaded just like any reference or annotation (see process above), but also, one might download an index that is linked to a particular reference. In that case, the index entry in the `master.yaml` file has a key `src` that takes the `uuid` of the reference to be linked to the index.
-
-Example of index `master.yaml`:
-```yaml
-index_1:
- metadata:
- name: index_test1
- species: mouse
- organization: ucsc
- downloader: fgelin
- levels:
- indices:
- - component: bwa_index
- complete:
- status: false
- src: 8040b09f-3844-3c42-b765-1f6a32614895
- commands:
- - wget -nv https://s3.us-east-2.amazonaws.com/refchef-tests/chr1.fa.gz
- - md5 *.fa.gz > postdownload_checksums.md5
- - gunzip *.gz
- - md5 *.fa > final_checksums.md5
-```
-
-In this case, the commands will be processed like before, but in the reference folder, a symlink to the index folder will be created.
-
-Arguments:
-`--execute, -e`: will execute all commands listed in the `master.yaml` for each reference, if reference doesn't exist in the location provided in the config file.
-`--new, -n`: path to a new yaml file containing other references to be downloaded and appended to the `master.yaml`.
-`--git, -g`: Git action. Choose from `commit` or `push`.
-`--outdir, -o`: output directory, where references will be downloaded to.
-`--git_local, -gl`: Local git directory, where the `master.yaml` file can be found.
-`--git_remote, -gr`: Remote git repository, in the format `user/project_name`.
-`--logs, -l`: Whether to save the log files.
-
-Example run:
- 1 - This will read in `new.yaml` file, append to `master.yaml` and commit the changes using git.
- `refchef-cook --config /path/to/cfg.yaml --execute --new new.yaml --git commit`.
-
- 2 - This will process `master.yaml`, commit and push changes to the remote repository:
- `refchef-cook --execute -o /path/to/output/dir --git_local /path/to/git/dir --git_remote user/project_name --git push`
-
-
-### `refchef-menu`
-This command provides a way for the user to list all references present in the system, based on `master.yaml`, as well as filter the list of references based on metadata options.
-Arguments:
-`--master, -m`: path to `master.yaml` file. Must be used if `--config` argument is not used.
-`--filter`: used to filter references based on metadata. Takes a pair key:value, or a list of pairs separated by comma: `key:value,key2:value2,key3:value3...`
-`--full`: whether to show the full table including files and location of files.
-
-Example:
-
-`refchef-menu`
-
-![menu](../assets/menu-full.png)
-
-`refchef-menu --filter species:human`
-
-![menu](../assets/menu-filtered.png)
diff --git a/docs/usage.md b/docs/usage.md
index d558a87..d473db3 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -1,307 +1,15 @@
-
-## **Overview**
-RefChef is a reference management system that includes additional tools to record the provenance of reference sequences, indices, and annotations. It was created to enable reproducible research.
-RefChef will:
-
-1. Document the exact steps undertaken in the retrieval and processing of genomic references
-2. Maintain the associated metadata
-3. Provide a mechanism for automatically reproducing retrieval and creation of an exact copy of genomic references
-
-**RefChef comes with two commands:**
-
-[**`refchef-cook`**](#refchef-cook):
-Will read recipes and execute the commands that will retrieve the references, indices, or annotations based on the contents of [`master.yaml`](#master.yaml).
-
-[**`refchef-menu`**](#refchef-menu):
-Provides a way for the user to list all references present in the system, based on [`master.yaml`](#master.yaml), as well as filter the list of references based on metadata options.
-![Diagram](assets/refchef-diagram.svg)
-
-
-**RefChef requires a `master.yaml` file:**
-
-In addition to the [`refchef-cook`](#refchef-cook) and [`refchef-menu`](#refchef-menu) commands, RefChef requires a [`master.yaml`](#master.yaml) containing a list of references, indices, annotations, and metadata, as well as the commands necessary to download and process the files. When [`refchef-cook`](#refchef-cook) is executed, RefChef will append the [`master.yaml`](#master.yaml) to change the `complete` option from `false` to `true`and will also add a `uuid` for each reference, the date the files were downloaded and their location, as well as a complete list of files. Based on the arguments you pass to [`refchef-cook`](#refchef-cook), it will either commit those changes to [`master.yaml`](#master.yaml) to a local repository or commit and push the changes to a remote repository.
-
-**RefChef requires configuration information:**
-
-[`refchef-cook`](#refchef-cook) and [`refchef-menu`](#refchef-menu) both require some configuration information, including:
-
-1. Where you'd like the references to be saved
-2. The local git repository for version control of references
-3. The remote github repository for version control of reference
- sequences (optional).
-
-This information can be specified in a [`cfg.yaml`](#cfg.yaml) file, a [`cfg.ini`](#cfg.ini) file, or it can be passed as arguments to [`refchef-cook`](#refchef-cook).
-
-## **Quickstart**
-
-This quickstart assumes that [bwa](http://bio-bwa.sourceforge.net/) and [bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml) are installed and in your current path.
-
-Create a [remote repository](https://help.github.com/en/articles/creating-a-new-repository) and [clone it](https://help.github.com/en/articles/cloning-a-repository).
-
-Create a directory for refchef to save your references.
-
-Create a [`master.yaml`](#master.yaml) file and save it in your local git repository directory. Here is a [`master.yaml`](#master.yaml) file that will download a yeast genome from Ensembl:
-
-```yaml
-S_cerevisiae:
- metadata:
- name: S_cerevisiae
- common_name: yeast
- ncbi_taxon_id: 4932
- organism: Saccharomyces cerevisiae
- organization: ensembl
- custom: no
- description: corresponds to genbank id GCA_000146045.2
- downloader: joselynn wallace
- ensembl_release_number: 87
- accession:
- genbank:
- refseq:
- levels:
- references:
- - component: primary
- complete:
- status: false
- commands:
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/CHECKSUMS
- - md5 *.gz > postdownload-checksums.md5
- - gunzip *.gz
- - md5 *.* > final_checksums.md5
-```
-Pass the configuration arguments in a config file or directly to [`refchef-cook`](#refchef-cook) (as seen in the following example):
-
-```
-refchef-cook -e -o /Users/jwalla12/references -gl /Users/jwalla12/remote_references -gr jrwallace/remote_references --git commit -l
-```
-
-After [`refchef-cook`](#refchef-cook) is run, [`master.yaml`](#master.yaml) will reflect that you have downloaded the reference and it will now look like this:
-
-```yaml
-S_cerevisiae:
- metadata:
- name: S_cerevisiae
- common_name: yeast
- ncbi_taxon_id: 4932
- organism: Saccharomyces cerevisiae
- organization: ensembl
- custom: false
- description: corresponds to genbank id GCA_000146045.2
- downloader: joselynn wallace
- ensembl_release_number: 87
- accession:
- genbank: null
- refseq: null
- levels:
- references:
- - component: primary
- complete:
- status: true
- time: '2019-07-25 09:08:37.478553'
- commands:
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/CHECKSUMS
- - md5 *.gz > postdownload-checksums.md5
- - gunzip *.gz
- - md5 *.* > final_checksums.md5
- location: /Users/jwalla12/references/S_cerevisiae/primary
- files:
- - metadata.txt
- - postdownload-checksums.md5
- - CHECKSUMS
- - final_checksums.md5
- - Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- uuid: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
-```
-
-Make another .yaml file to create a bowtie2 index of this genome, call the file `bowtie2.yaml`.
-
-```yaml
-S_cerevisiae:
- levels:
- indices:
- - component: bowtie2_index
- complete:
- status: false
- src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
- commands:
- - mkdir /Users/jwalla12/references/S_cerevisiae/bowtie2_index
- - cd /Users/jwalla12/references/S_cerevisiae/bowtie2_index
- - ln -s /Users/jwalla12/references/S_cerevisiae/primary/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa ./Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- - bowtie2-build Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa S_cerevisiae
- - md5 ./*.* > ./final_checksums.md5
-```
-
-Then use [`refchef-cook`](#refchef-cook) and specify the new yaml to add to [`master.yaml`](#master.yaml).
-
-```
-refchef-cook -e -o /Users/jwalla12/references -gl /Users/jwalla12/remote_references -gr jrwallace/remote_references -n /Users/jwalla12/remote_references/bowtie2.yaml -g commit -l
-```
-
-Make another .yaml file to create a bwa index of this genome, call the file `bwa.yaml`.
-
-```yaml
-S_cerevisiae:
- levels:
- indices:
- - component: bwa_index
- complete:
- status: false
- src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
- commands:
- - mkdir /Users/jwalla12/references/S_cerevisiae/bwa_index
- - cd /Users/jwalla12/references/S_cerevisiae/bwa_index
- - ln -s /Users/jwalla12/references/S_cerevisiae/primary/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa ./Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- - bwa index Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa -p S_cerevisiae
- - md5 ./*.* > ./final_checksums.md5
-```
-
-Then use [`refchef-cook`](#refchef-cook) and specify the new yaml to add to [`master.yaml`](#master.yaml).
-
-```
-refchef-cook -e -o /Users/jwalla12/references -gl /Users/jwalla12/remote_references -gr jrwallace/remote_references -n /Users/jwalla12/remote_references/bwa.yaml -g commit -l
-```
-
-We can also track annotation files for the reference genome. Make the following .yaml file and call it `gtf.yaml`:
-
-```yaml
-S_cerevisiae:
- levels:
- annotations:
- - component: gtf
- complete:
- status: false
- commands:
- - wget ftp://ftp.ensembl.org/pub/release-87/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.87.gtf.gz
- - wget ftp://ftp.ensembl.org/pub/release-87/gtf/saccharomyces_cerevisiae/CHECKSUMS
- - md5 *.gz > postdownload-checksums.md5
- - gunzip *.gz
- - md5 *.* > final_checksums.md5
-```
-
-Then use [`refchef-cook`](#refchef-cook) and specify the new yaml to add to [`master.yaml`](#master.yaml).
-
-```
-refchef-cook -e -o /Users/jwalla12/references -gl /Users/jwalla12/remote_references -gr jrwallace/remote_references -n /Users/jwalla12/remote_references/gtf.yaml -g commit -l
-```
-We can see what references are available using [`refchef-menu`](#refchef-menu):
-```
-refchef-menu -f /Users/jwalla12/remote_references/master.yaml
-```
-```
-β πΆ RefChef Menu βββββββββββββββββββββββββ¬ββββββββββββββββ¬ββββββββββββββββββββββββββββββββββββββββββββ¬βββββββββββββββββββββββββββββββββββββββ
-β name β organism β component β description β uuid β
-ββββββββββββββββΌβββββββββββββββββββββββββββΌββββββββββββββββΌββββββββββββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββββββββββββββββ€
-β S_cerevisiae β Saccharomyces cerevisiae β gtf β corresponds to genbank id GCA_000146045.2 β 5f7ae94c-2e51-3cc6-bcbf-6e251c75ef2f β
-β S_cerevisiae β Saccharomyces cerevisiae β bowtie2_index β corresponds to genbank id GCA_000146045.2 β 93393699-cb40-3ad7-ac07-ae4bdb1efd3e β
-β S_cerevisiae β Saccharomyces cerevisiae β bwa_index β corresponds to genbank id GCA_000146045.2 β dff337a6-9a1d-3313-8ced-dc6f3bfc9689 β
-β S_cerevisiae β Saccharomyces cerevisiae β primary β corresponds to genbank id GCA_000146045.2 β dff337a6-9a1d-3313-8ced-dc6f3bfc9689 β
-ββββββββββββββββ΄βββββββββββββββββββββββββββ΄ββββββββββββββββ΄ββββββββββββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββββββββββββββββ
-```
-We can also get this information if we look at [`master.yaml`](#master.yaml):
-```yaml
-S_cerevisiae:
- metadata:
- name: S_cerevisiae
- common_name: yeast
- ncbi_taxon_id: 4932
- organism: Saccharomyces cerevisiae
- organization: ensembl
- custom: false
- description: corresponds to genbank id GCA_000146045.2
- downloader: joselynn wallace
- ensembl_release_number: 87
- accession:
- genbank: null
- refseq: null
- levels:
- references:
- - component: primary
- complete:
- status: true
- time: '2019-07-25 16:26:42.700668'
- commands:
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/CHECKSUMS
- - md5 *.gz > postdownload-checksums.md5
- - gunzip *.gz
- - md5 *.* > final_checksums.md5
- location: /Users/jwalla12/references/S_cerevisiae/primary
- files:
- - metadata.txt
- - postdownload-checksums.md5
- - CHECKSUMS
- - final_checksums.md5
- - Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- uuid: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
- indices:
- - component: bowtie2_index
- complete:
- status: true
- time: '2019-07-25 16:26:43.971349'
- src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
- commands:
- - mkdir /Users/jwalla12/references/yeast_refs/bowtie2_index
- - cd /Users/jwalla12/references/yeast_refs/bowtie2_index
- - ln -s /Users/jwalla12/references/yeast_refs/primary/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- /Users/jwalla12/references/yeast_refs/bowtie2_index/
- - bowtie2-build /Users/jwalla12/references/yeast_refs/bowtie2_index/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- S_cerevisiae
- - md5 /Users/jwalla12/references/yeast_refs/bowtie2_index/*.* > /Users/jwalla12/references/yeast_refs/bowtie2_index/final_checksums.md5
- location: /Users/jwalla12/references/S_cerevisiae/bowtie2_index
- files:
- - metadata.txt
- uuid: 84928c3e-af1a-11e9-a45e-8c8590bd206d
- - component: bwa_index
- complete:
- status: true
- time: '2019-07-25 16:26:45.183284'
- src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
- commands:
- - mkdir /Users/jwalla12/references/yeast_refs/bwa_index
- - cd /Users/jwalla12/references/yeast_refs/bwa_index
- - ln -s /Users/jwalla12/references/yeast_refs/primary/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- /Users/jwalla12/references/yeast_refs/bwa_index/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- - bwa index /Users/jwalla12/references/yeast_refs/bwa_index/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- > /Users/jwalla12/references/yeast_refs/bwa_index/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- - md5 /Users/jwalla12/references/yeast_refs/bwa_index/*.* > /Users/jwalla12/references/yeast_refs/bwa_index/final_checksums.md5
- location: /Users/jwalla12/references/S_cerevisiae/bwa_index
- files:
- - metadata.txt
- uuid: 854b7780-af1a-11e9-a9f8-8c8590bd206d
- annotations:
- - component: gtf
- complete:
- status: true
- time: '2019-07-25 16:26:54.326082'
- commands:
- - wget ftp://ftp.ensembl.org/pub/release-87/gtf/saccharomyces_cerevisiae/Saccharomyces_cerevisiae.R64-1-1.87.gtf.gz
- - wget ftp://ftp.ensembl.org/pub/release-87/gtf/saccharomyces_cerevisiae/CHECKSUMS
- - md5 *.gz > postdownload-checksums.md5
- - gunzip *.gz
- - md5 *.* > final_checksums.md5
- location: /Users/jwalla12/references/S_cerevisiae/gtf
- files:
- - metadata.txt
- - postdownload-checksums.md5
- - Saccharomyces_cerevisiae.R64-1-1.87.gtf
- - CHECKSUMS
- - final_checksums.md5
- uuid: 5f7ae94c-2e51-3cc6-bcbf-6e251c75ef2f
-```
-## **Usage**
+---
###**refchef-cook**
-Refchef-cook reads recipes (yaml files) and executes the commands that will retrieve and/or process the references, indices, or annotations.
-
+`refchef-cook` reads [`master.yaml`](./inputs.md#master.yaml) and executes the commands that will retrieve and/or process the references, indices, or annotations.
**usage:**
-```
+```bash
refchef-cook [*arguments*]
```
**arguments:**
-```
+```bash
--help, -h Show this help message and exit.
--execute, -e Executes the YAML file (master or new if specified).
@@ -328,18 +36,18 @@ refchef-cook [*arguments*]
2. This will process `master.yaml`, commit and push changes to the remote repository:
`refchef-cook --execute -o /path/to/output/dir --git_local /path/to/git/dir --git_remote user/project_name --git push`.
-
+---
###**refchef-menu**
-Refchef-menu provides a way for the user to list all references present in the system, based on `master.yaml`, as well as filter the list of references based on metadata options. You must specify either `--master, -m` or `--config, -c`.
+Refchef-menu provides a way for the user to list all references present in the system, based on [`master.yaml`](./inputs.md#master.yaml), as well as filter the list of references based on metadata options. You must specify either `--master, -m` or `--config, -c`.
**usage:**
-```
+```bash
refchef-menu [*arguments*]
```
**arguments:**
-```
+```bash
--help, -h Show this help message and exit.
--filter Field:value pair for filtering menu.
@@ -355,228 +63,11 @@ refchef-menu [*arguments*]
**example:**
1 - This will look at all primary genome references available in the current system:
`refchef-menu -f /Users/jwalla12/remote_references/master.yaml --filter component:primary`
-```
-β πΆ RefChef Menu βββββββββββββββββββββββββ¬ββββββββββββ¬ββββββββββββββββββββββββββββββββββββββββββββ¬βββββββββββββββββββββββββββββββββββββββ
-β name β organism β component β description β uuid β
-ββββββββββββββββΌβββββββββββββββββββββββββββΌββββββββββββΌββββββββββββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββββββββββββββββ€
-β S_cerevisiae β Saccharomyces cerevisiae β primary β corresponds to ganbank id GCA_000146045.2 β dff337a6-9a1d-3313-8ced-dc6f3bfc9689 β
-ββββββββββββββββ΄βββββββββββββββββββββββββββ΄ββββββββββββ΄ββββββββββββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββββββββββββββββ
-```
-
-## **Inputs**
-###**master.yaml**
-
-**master.yaml overview**
-Refchef uses YAML files that are composed of nested entry and value pairs -- for example, the entry and value pair `common_name`: `yeast`. The spacing and indentation of the entries and values are meaningful - Refchef uses the convention of using 2 spaces to indent each subsequent level of the entries and values in the YAML. Additionally, the spaces and colons are important: be sure to include a `:` and space between each entry and value. Some entries in the yaml (`- component:`, commands under the `commands` header) will have a preceeding `-` and a space before them, which are required for Refchef to properly process the YAML.
-
-Example `master.yaml` before processing:
-```yaml
-S_cerevisiae:
- metadata:
- name: S_cerevisiae
- common_name: yeast
- ncbi_taxon_id: 4932
- organism: Saccharomyces cerevisiae
- organization: ensembl
- custom: no
- description: corresponds to genbank id GCA_000146045.2
- downloader: joselynn wallace
- ensembl_release_number: 87
- accession:
- genbank:
- refseq:
- levels:
- references:
- - component: primary
- complete:
- status: false
- commands:
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa.gz
- - wget ftp://ftp.ensembl.org/pub/release-87/fasta/saccharomyces_cerevisiae/dna/CHECKSUMS
- - md5 *.gz > postdownload-checksums.md5
- - gunzip *.gz
- - md5 *.* > final_checksums.md5
-```
-**master.yaml keys**
-The first line of the yaml is the `key` for the block of code that follows it (`S_cerevisiae:` in the above example), they key must:
- 1. The `key` must be a string.
- 2. The `key` must match the `name` entry under the `metadata` header of the yaml.
- 3. Each `key` in a given `master.yaml` should be unique.
-
-The string of text entered in the `key` field will be used to create a folder inside the directory you specify as your output in your config file (cfg.ini or cfg.yaml) or `refchef-cook` arguments. In the previous quickstart example, we used `/Users/jwalla12/references` as the output directory for `refchef-cook`. Here is the collapsed file tree that refchef created, note that the folder containing the primary reference is nested inside a folder named `S_cerevisiae` based on the `key`.
-
-```
-./Users/jwalla12/references #this directory is specified in refchef-cook or the config files
-βββ S_cerevisiae
- βββ bowtie2_index
- βββ bwa_index
- βββ gtf
- βββ primary
-
-```
-**master.yaml metadata**
-Right after the `key` is the `metadata` section, which must contain the fields listed below (although not all of them need to be filled out). Also indicated below: if filling the fields out is required, their expected format, and a brief description of their expected content:
-
-```yaml
- metadata:
- name: Required, string, should match `key`, creates a directory in the output folder
- common_name: Required, string, common name of organism or 'none' of not applicable.
- ncbi_taxon_id: Required, integer, based on NCBI conventions, enter 'none' if not applicable
- organism: Required, string, suggest using genus, species, and/or strain identifiers
- organization: Required, string, genome reference database, enter 'none' if a custom assembly
- custom: Required, string, should be 'yes'/'no' to indicate if reference was generated/altered in-house
- description: Required, string, additional pertinent information about reference
- downloader: Required, string, indicates who downloaded reference
- ensembl_release_number: Not required, integer, leave blank if reference is not from ensembl
- accession:
- genbank: Not required, string, leave blank if reference is not from genbank
- refseq: Not required, string, leave blank if reference is not from refseq
-```
-
-!!! Caution
- When running a new YAML file to add additional information to a primary reference, metadata entries present in the initial [`master.yaml`](#master.yaml) file can be omitted (for example, `ncbi_taxon_id:`, `common_name:`). When adding indices or annotations to a primary reference already in [`master.yaml`](#master.yaml), the metadata in [`master.yaml`](#master.yaml) will be overwritten by the metadata in the new.yaml file. This could be helpful in situations where you want to update the metadata fields.
-
-**master.yaml levels**
-The `levels` section contains following fields below. Also indicated below: if filling the fields out is required, their expected format, and a brief description of their expected content:
-
-```yaml
- levels:
- references:
- - component: Required, string, must be either 'primary', 'indices', or 'annotations'
- complete:
- status: Required, string, must be 'true' or 'false', refchef will execute commands only if 'false'
- src: Including the 'src:' field is optional -- use this to indicate the uuid of the primary reference that should be linked to an index file.
- commands:
-
-```
-!!! Caution
- The entry `status` must be set to `false` for Refchef to exeecute the commands in the code block. If it is set to `true`, the code will not execute (even if the -e flag is set). After a code block is executed, the `false` flag will flip to `true` automatically and the `time:` entry will appear under the `status` header. The `time:` header will be populated with the datetime stamp the reference was downloaded.
-
-**master.yaml commands**
-This portion of the `master.yaml` should be populated with the specific commands you want to execute to download and process your reference. Each command should be prepended with a `-` and a space.
-
-!!! Caution
- Each time files are processed using a set of commands in the YAML, the last command must run `md5` on all of the files and direct the output to a file called `final_checksums.md5`.
-
-
-### **cfg.yaml**
-**overview**
-Refchef requires configuration information, which can be passed as arguments or specified in a configuration file. A `cfg.yaml` is one option for configuration and should contain the following fields. Also indicated below: If filling out the field is required, their expected format, and a brief description of their contents.
-
-```yaml
-config-yaml:
- path-settings:
- reference-directory: Required, string, directory where references will be downloaded and processed.
- git-directory: Required, string, directory of local git repository where `master.yaml` is located.
- remote-repository: Not required, string (user/repo), used for remote version control of `master.yaml`
- log-settings:
- log: Required, should be 'yes' or 'no' (in single quotes), indicate if log files should be created.
-```
-**example:**
-```yaml
-config-yaml:
- path-settings:
- reference-directory: /Users/jwalla12/references
- git-directory: /Users/jwalla12/remote_references
- remote-repository: jrwallace/remote_references
- log-settings:
- log: 'yes'
-```
-
-### **cfg.ini**
-**overview**
-Refchef requires configuration information, which can be passed as arguments or specified in a configuration file. A `cfg.ini` is one option for configuration and should contain the following fields. Also indicated below: If filling out the field is required, their expected format, and a brief description of their contents.
-
-`cfg.ini`:
-```toml
-[path-settings]
-reference-directory= Required, string, directory where references will be downloaded and processed.
-git-directory= Required, string, directory of local git repository where `master.yaml` is located.
-remote-repository= Not required, string (user/repo), used for remote version control of `master.yaml`
-[log-settings]
-log= Required, should be 'yes' or 'no' (in single quotes), indicate if log files should be created.
-[runtime-settings]
-break-on-error=yes Required, should be 'yes' or 'no' (in single quotes)
-verbose=yes Required, should be 'yes' or 'no' (in single quotes)
-```
-**example:**
-
-```toml
-[path-settings]
-reference-directory=/Users/jwalla12/references
-git-directory=/Users/jwalla12/remote_references
-remote-repository=jrwallace/remote_references
-[log-settings]
-log=yes
-[runtime-settings]
-break-on-error=yes
-verbose=yes
-```
-
-## **Folders and Files**
-
-Refchef creates several folders based on:
-1. The master.yaml key (which should match the 'name' entry under 'metadata' in master.yaml).
-2. The 'component' entry under 'levels' in master.yaml.
-
- Here is the collapsed file tree that refchef created from the quickstart part of the documentation and what the directory names are based on:
-
-```
-./Users/jwalla12/references #this directory is specified in refchef-cook or the config files
-βββ S_cerevisiae #this is named after the 'key' and the 'name' entry under 'metadata' in master.yaml
- βββ bowtie2_index #this folder is created in the master.yaml `commands` section.
- βββ bwa_index #this folder is created in the master.yaml `commands` section.
- βββ gtf #this folder is created in the master.yaml `commands` section.
- βββ primary #this is named after the 'component' entry under 'levels' in master.yaml
-```
-
-Here is the expanded file tree:
-
-```
-./Users/jwalla12/references
-βββ S_cerevisiae
- βββ bowtie2_index
- βΒ Β βββ metadata.txt
- βββ bwa_index
- βΒ Β βββ metadata.txt
- βββ gtf
- βΒ Β βββ CHECKSUMS
- βΒ Β βββ Saccharomyces_cerevisiae.R64-1-1.87.gtf
- βΒ Β βββ final_checksums.md5
- βΒ Β βββ metadata.txt
- βΒ Β βββ postdownload-checksums.md5
- βββ primary
- βββ CHECKSUMS
- βββ Saccharomyces_cerevisiae.R64-1-1.dna.toplevel.fa
- βββ bowtie2_index -> /Users/jwalla12/references/S_cerevisiae/bowtie2_index
- βββ bwa_index -> /Users/jwalla12/references/S_cerevisiae/bwa_index
- βββ final_checksums.md5
- βββ metadata.txt
- βββ postdownload-checksums.md5
-```
-This indicates that refchef has created symlinked directories for bowtie2 and bwa indices in `/Users/jwalla12/references/S_cerevisiae/primary`. This process (linking reference and index) is triggered by:
-1. The addition of the `src:` line in bowtie2.yaml and bwa.yaml
-2. Specifying the master.yaml `levels` are `indices:` in the master.yaml
-
-If we look at the output from `refchef-menu`, we see the UUID for the primary reference file, which is `dff337a6-9a1d-3313-8ced-dc6f3bfc9689`.
-
-```
+```bash
β πΆ RefChef Menu βββββββββββββββββββββββββ¬ββββββββββββ¬ββββββββββββββββββββββββββββββββββββββββββββ¬βββββββββββββββββββββββββββββββββββββββ
β name β organism β component β description β uuid β
ββββββββββββββββΌβββββββββββββββββββββββββββΌββββββββββββΌββββββββββββββββββββββββββββββββββββββββββββΌβββββββββββββββββββββββββββββββββββββββ€
β S_cerevisiae β Saccharomyces cerevisiae β primary β corresponds to ganbank id GCA_000146045.2 β dff337a6-9a1d-3313-8ced-dc6f3bfc9689 β
ββββββββββββββββ΄βββββββββββββββββββββββββββ΄ββββββββββββ΄ββββββββββββββββββββββββββββββββββββββββββββ΄βββββββββββββββββββββββββββββββββββββββ
```
-In this clipping from bowtie2.yaml, note that the UUID was indicated in the `src:` entry under `component`, `indices`, and `levels`.
-
-```yaml
-S_cerevisiae:
- levels:
- indices:
- - component: bowtie2_index
- complete:
- status: false
- src: dff337a6-9a1d-3313-8ced-dc6f3bfc9689
-```
-This indicates which primary reference was used to create the index file.
\ No newline at end of file
diff --git a/docs/usecases.md b/docs/usecases.md
new file mode 100644
index 0000000..84c8b6c
--- /dev/null
+++ b/docs/usecases.md
@@ -0,0 +1,10 @@
+###**Download reference, local repository `master.yaml` version control:**
+![Diagram](assets/git_commit_usecase.svg)
+###**Download reference, remote repository `master.yaml` version control:**
+![Diagram](assets/git_push_usecase.svg)
+###**Download new reference, local repository `master.yaml` version control:**
+![Diagram](assets/newyaml_usecase.svg)
+###**Add manually downloaded reference, append commands to master.yaml, do not execute commands, local repository `master.yaml` version control:**
+![Diagram](assets/noexecute_usecase.svg)
+###**refchef-menu to view references available on the system:**
+![Diagram](assets/refchefmenu_usecase.svg)
diff --git a/mkdocs.yml b/mkdocs.yml
index fb6aa65..a5b56a9 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -28,8 +28,11 @@ markdown_extensions:
nav:
- Home: 'index.md'
- Installation: 'installation.md'
+ - Overview: 'overview.md'
- Usage: 'usage.md'
- - YAML specs: 'specs.md'
+ - Inputs: 'inputs.md'
+ - Folders: 'folders.md'
+ - File specifications: 'specs.md'
+ - Quickstart: 'quickstart.md'
- RefChef serve: 'serve.md'
- - Tutorials:
- - QuickStart: tutorials/quickstart.md
+ - Refchef use cases: 'usecases.md'
diff --git a/scripts/refchef-cook b/scripts/refchef-cook
index 88d5bc5..fb9a667 100644
--- a/scripts/refchef-cook
+++ b/scripts/refchef-cook
@@ -108,18 +108,22 @@ def main():
master = read_menu(conf)
for r in master.keys():
- for i in master[r]['levels']['references']:
- if not i['complete']['status']:
- logging.info(u"""
- -------------------------------------------
- The folowing references will be downloaded:
- - {0}
- ===========================================
- """.format(r))
- else:
- logging.info("""
- No references to download.
- """)
+ for type in ['references', 'indices', 'annotations']:
+ try:
+ for i in master[r]['levels'][type]:
+ if not i['complete']['status']:
+ logging.info(u"""
+ -------------------------------------------
+ The folowing references will be downloaded:
+ - {0}
+ ===========================================
+ """.format(r))
+ else:
+ logging.info("""
+ No references to download.
+ """)
+ except:
+ pass
## Execute, commit and push steps.
if arguments.execute:
diff --git a/scripts/refchef-serve b/scripts/refchef-serve
index 65f2d89..3c91334 100644
--- a/scripts/refchef-serve
+++ b/scripts/refchef-serve
@@ -8,7 +8,7 @@ from refchef import config
parser = argparse.ArgumentParser(description='Get and filter references available in the system.')
-parser.add_argument('--master', '-m', type=str, help='Path do to master.yaml')
+parser.add_argument('--master', '-f', type=str, help='Path do to master.yaml')
parser.add_argument('--config', '-c', type=str, help='Path do to config file in .yaml or .ini format.')
arguments = parser.parse_args()
diff --git a/scripts/templates/table.html b/scripts/templates/table.html
index 5f4eec4..e938d2c 100644
--- a/scripts/templates/table.html
+++ b/scripts/templates/table.html
@@ -99,7 +99,7 @@