diff --git a/README.md b/README.md
index 36f12eba5a..09292558c6 100644
--- a/README.md
+++ b/README.md
@@ -62,8 +62,8 @@ See individual pages for details!
 | doc2query | [+](docs/regressions-msmarco-passage-doc2query.md) |
 | doc2query-T5 | [+](docs/regressions-msmarco-passage-docTTTTTquery.md) | [+](docs/regressions-dl19-passage-docTTTTTquery.md) | [+](docs/regressions-dl20-passage-docTTTTTquery.md) |
 | **Learned sparse lexical (uniCOIL family)** |
-| uniCOIL noexp | [+](docs/regressions-msmarco-passage-unicoil-noexp.md) | [+](docs/regressions-dl19-passage-unicoil-noexp.md) | [+](docs/regressions-dl20-passage-unicoil-noexp.md) |
-| uniCOIL with doc2query-T5 | [+](docs/regressions-msmarco-passage-unicoil.md) | [+](docs/regressions-dl19-passage-unicoil.md) | [+](docs/regressions-dl20-passage-unicoil.md) |
+| uniCOIL noexp | [✓](docs/regressions-msmarco-passage-unicoil-noexp.md) | [✓](docs/regressions-dl19-passage-unicoil-noexp.md) | [✓](docs/regressions-dl20-passage-unicoil-noexp.md) |
+| uniCOIL with doc2query-T5 | [✓](docs/regressions-msmarco-passage-unicoil.md) | [✓](docs/regressions-dl19-passage-unicoil.md) | [✓](docs/regressions-dl20-passage-unicoil.md) |
 | uniCOIL with TILDE | [+](docs/regressions-msmarco-passage-unicoil-tilde-expansion.md) |
 | **Learned sparse lexical (other)** |
 | DeepImpact | [+](docs/regressions-msmarco-passage-deepimpact.md) |
@@ -83,8 +83,8 @@ See individual pages for details!
 | WP baselines | [+](docs/regressions-msmarco-doc-segmented-wp.md) | [+](docs/regressions-dl19-doc-segmented-wp.md) | [+](docs/regressions-dl20-doc-segmented-wp.md) |
 | doc2query-T5 | [+](docs/regressions-msmarco-doc-segmented-docTTTTTquery.md) | [+](docs/regressions-dl19-doc-segmented-docTTTTTquery.md) | [+](docs/regressions-dl20-doc-segmented-docTTTTTquery.md) |
 | **Learned sparse lexical** |
-| uniCOIL noexp | [+](docs/regressions-msmarco-doc-segmented-unicoil-noexp.md) | [+](docs/regressions-dl19-doc-segmented-unicoil-noexp.md) | [+](docs/regressions-dl20-doc-segmented-unicoil-noexp.md) |
-| uniCOIL with doc2query-T5 | [+](docs/regressions-msmarco-doc-segmented-unicoil.md) | [+](docs/regressions-dl19-doc-segmented-unicoil.md) | [+](docs/regressions-dl20-doc-segmented-unicoil.md) |
+| uniCOIL noexp | [✓](docs/regressions-msmarco-doc-segmented-unicoil-noexp.md) | [✓](docs/regressions-dl19-doc-segmented-unicoil-noexp.md) | [✓](docs/regressions-dl20-doc-segmented-unicoil-noexp.md) |
+| uniCOIL with doc2query-T5 | [✓](docs/regressions-msmarco-doc-segmented-unicoil.md) | [✓](docs/regressions-dl19-doc-segmented-unicoil.md) | [✓](docs/regressions-dl20-doc-segmented-unicoil.md) |
 
 ### MS MARCO V2 Passage Corpus
 
@@ -97,8 +97,8 @@ See individual pages for details!
 | baselines | [+](docs/regressions-msmarco-v2-passage-augmented.md) | [+](docs/regressions-dl21-passage-augmented.md) |
 | doc2query-T5 | [+](docs/regressions-msmarco-v2-passage-augmented-d2q-t5.md) | [+](docs/regressions-dl21-passage-augmented-d2q-t5.md) |
 | **Learned sparse lexical** |
-| uniCOIL noexp zero-shot | [+](docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md) | [+](docs/regressions-dl21-passage-unicoil-noexp-0shot.md) |
-| uniCOIL with doc2query-T5 zero-shot | [+](docs/regressions-msmarco-v2-passage-unicoil-0shot.md) | [+](docs/regressions-dl21-passage-unicoil-0shot.md) |
+| uniCOIL noexp zero-shot | [✓](docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md) | [✓](docs/regressions-dl21-passage-unicoil-noexp-0shot.md) |
+| uniCOIL with doc2query-T5 zero-shot | [✓](docs/regressions-msmarco-v2-passage-unicoil-0shot.md) | [✓](docs/regressions-dl21-passage-unicoil-0shot.md) |
 
 ### MS MARCO V2 Document Corpus
 
@@ -111,8 +111,8 @@ See individual pages for details!
 | baselines | [+](docs/regressions-msmarco-v2-doc-segmented.md) | [+](docs/regressions-dl21-doc-segmented.md) |
 | doc2query-T5 | [+](docs/regressions-msmarco-v2-doc-segmented-d2q-t5.md) | [+](docs/regressions-dl21-doc-segmented-d2q-t5.md) |
 | **Learned sparse lexical** |
-| uniCOIL noexp zero-shot | [+](docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.md) | [+](docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot-v2.md) |
-| uniCOIL with doc2query-T5 zero-shot | [+](docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot-v2.md) | [+](docs/regressions-dl21-doc-segmented-unicoil-0shot-v2.md) |
+| uniCOIL noexp zero-shot | [✓](docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.md) | [✓](docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot-v2.md) |
+| uniCOIL with doc2query-T5 zero-shot | [✓](docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot-v2.md) | [✓](docs/regressions-dl21-doc-segmented-unicoil-0shot-v2.md) |
 
 ### Regressions for BEIR (v1.0.0)
 
diff --git a/docs/regressions-dl19-doc-segmented-unicoil-noexp.md b/docs/regressions-dl19-doc-segmented-unicoil-noexp.md
index 1771f3e51c..26b25d10e5 100644
--- a/docs/regressions-dl19-doc-segmented-unicoil-noexp.md
+++ b/docs/regressions-dl19-doc-segmented-unicoil-noexp.md
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented-unicoil-noexp
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-doc-segmented-unicoil-noexp
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil-noexp.tar` is 11 GB and has MD5 checksum `11b226e1cacd9c8ae0a660fd14cdd710`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented-unicoil-noexp \
   --corpus-path collections/msmarco-doc-segmented-unicoil-noexp
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-dl19-doc-segmented-unicoil.md b/docs/regressions-dl19-doc-segmented-unicoil.md
index 1bc3d1055e..ccbdd8860f 100644
--- a/docs/regressions-dl19-doc-segmented-unicoil.md
+++ b/docs/regressions-dl19-doc-segmented-unicoil.md
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented-unicoil
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-doc-segmented-unicoil
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil.tar` is 19 GB and has MD5 checksum `6a00e2c0c375cb1e52c83ae5ac377ebb`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl19-doc-segmented-unicoil \
   --corpus-path collections/msmarco-doc-segmented-unicoil
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-dl19-passage-unicoil-noexp.md b/docs/regressions-dl19-passage-unicoil-noexp.md
index 9eabd6ea04..3d5ac98053 100644
--- a/docs/regressions-dl19-passage-unicoil-noexp.md
+++ b/docs/regressions-dl19-passage-unicoil-noexp.md
@@ -22,11 +22,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage-unicoil-noexp
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage-unicoil-noexp
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -36,16 +43,13 @@ tar xvf collections/msmarco-passage-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil-noexp.tar` is 2.7 GB and has MD5 checksum `f17ddd8c7c00ff121c3c3b147d2e17d8`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage-unicoil-noexp \
   --corpus-path collections/msmarco-passage-unicoil-noexp
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-dl19-passage-unicoil.md b/docs/regressions-dl19-passage-unicoil.md
index 1a650e63a5..66f2292f8b 100644
--- a/docs/regressions-dl19-passage-unicoil.md
+++ b/docs/regressions-dl19-passage-unicoil.md
@@ -22,11 +22,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage-unicoil
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl19-passage-unicoil
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -36,16 +43,13 @@ tar xvf collections/msmarco-passage-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil.tar` is 3.4 GB and has MD5 checksum `78eef752c78c8691f7d61600ceed306f`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl19-passage-unicoil \
   --corpus-path collections/msmarco-passage-unicoil
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-dl20-doc-segmented-unicoil-noexp.md b/docs/regressions-dl20-doc-segmented-unicoil-noexp.md
index 419df463d5..106c9a6723 100644
--- a/docs/regressions-dl20-doc-segmented-unicoil-noexp.md
+++ b/docs/regressions-dl20-doc-segmented-unicoil-noexp.md
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented-unicoil-noexp
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-doc-segmented-unicoil-noexp
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil-noexp.tar` is 11 GB and has MD5 checksum `11b226e1cacd9c8ae0a660fd14cdd710`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented-unicoil-noexp \
   --corpus-path collections/msmarco-doc-segmented-unicoil-noexp
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-dl20-doc-segmented-unicoil.md b/docs/regressions-dl20-doc-segmented-unicoil.md
index 2247fbc143..e908120a1b 100644
--- a/docs/regressions-dl20-doc-segmented-unicoil.md
+++ b/docs/regressions-dl20-doc-segmented-unicoil.md
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented-unicoil
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-doc-segmented-unicoil
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil.tar` is 19 GB and has MD5 checksum `6a00e2c0c375cb1e52c83ae5ac377ebb`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl20-doc-segmented-unicoil \
   --corpus-path collections/msmarco-doc-segmented-unicoil
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-dl20-passage-unicoil-noexp.md b/docs/regressions-dl20-passage-unicoil-noexp.md
index 50395ec0eb..317f16f0c6 100644
--- a/docs/regressions-dl20-passage-unicoil-noexp.md
+++ b/docs/regressions-dl20-passage-unicoil-noexp.md
@@ -22,11 +22,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage-unicoil-noexp
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage-unicoil-noexp
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -36,16 +43,13 @@ tar xvf collections/msmarco-passage-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil-noexp.tar` is 2.7 GB and has MD5 checksum `f17ddd8c7c00ff121c3c3b147d2e17d8`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage-unicoil-noexp \
   --corpus-path collections/msmarco-passage-unicoil-noexp
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-dl20-passage-unicoil.md b/docs/regressions-dl20-passage-unicoil.md
index 8de5d646c5..2d11316e2d 100644
--- a/docs/regressions-dl20-passage-unicoil.md
+++ b/docs/regressions-dl20-passage-unicoil.md
@@ -22,11 +22,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage-unicoil
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl20-passage-unicoil
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -36,16 +43,13 @@ tar xvf collections/msmarco-passage-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil.tar` is 3.4 GB and has MD5 checksum `78eef752c78c8691f7d61600ceed306f`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl20-passage-unicoil \
   --corpus-path collections/msmarco-passage-unicoil
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-dl21-doc-segmented-unicoil-0shot-v2.md b/docs/regressions-dl21-doc-segmented-unicoil-0shot-v2.md
index 7a1296ebf0..f6cac75cbd 100644
--- a/docs/regressions-dl21-doc-segmented-unicoil-0shot-v2.md
+++ b/docs/regressions-dl21-doc-segmented-unicoil-0shot-v2.md
@@ -23,15 +23,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented-unicoil-0shot-v2
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented-unicoil-0shot-v2
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot_v2.tar -P collections/
 
@@ -43,12 +54,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_0shot_v2 collections/msmarco-v2-
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_0shot_v2.tar` is 72 GB and has an MD5 checksum of `c5639748c2cbad0152e10b0ebde3b804`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented-unicoil-0shot-v2 \
+  --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot-v2
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-doc-segmented-unicoil-0shot-v2 \
@@ -58,7 +75,7 @@ target/appassembler/bin/IndexCollection \
   >& logs/log.msmarco-v2-doc-segmented-unicoil-0shot-v2 &
 ```
 
-The path `/path/to/msmarco-v2-doc-segmented-unicoil-0shot/` should point to the corpus downloaded above.
+The path `/path/to/msmarco-v2-doc-segmented-unicoil-0shot-v2/` should point to the corpus downloaded above.
 
 The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the uniCOIL tokens.
 Upon completion, we should have an index with 124,131,414 documents.
@@ -73,7 +90,7 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot-v2/ \
   -topics src/main/resources/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \
@@ -84,7 +101,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot.topics.dl21.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot.topics.dl21.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot.topics.dl21.unicoil.0shot.txt
diff --git a/docs/regressions-dl21-doc-segmented-unicoil-0shot.md b/docs/regressions-dl21-doc-segmented-unicoil-0shot.md
index 61117937de..ec29fae92c 100644
--- a/docs/regressions-dl21-doc-segmented-unicoil-0shot.md
+++ b/docs/regressions-dl21-doc-segmented-unicoil-0shot.md
@@ -22,15 +22,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented-unicoil-0shot
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented-unicoil-0shot
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot.tar -P collections/
 
@@ -42,12 +53,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_0shot collections/msmarco-v2-doc
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_0shot.tar` is 62 GB and has an MD5 checksum of `889db095113cc4fe152382ccff73304a`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented-unicoil-0shot \
+  --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-doc-segmented-unicoil-0shot \
@@ -72,7 +89,7 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot/ \
   -topics src/main/resources/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \
@@ -83,7 +100,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot.topics.dl21.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot.topics.dl21.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot.topics.dl21.unicoil.0shot.txt
diff --git a/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot-v2.md b/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot-v2.md
index ac9b75e22f..b7ca97fda9 100644
--- a/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot-v2.md
+++ b/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot-v2.md
@@ -23,15 +23,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented-unicoil-noexp-0shot-v2
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented-unicoil-noexp-0shot-v2
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar -P collections/
 
@@ -43,12 +54,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2 collections/msmar
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and has an MD5 checksum of `97ba262c497164de1054f357caea0c63`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented-unicoil-noexp-0shot-v2 \
+  --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 \
@@ -58,7 +75,7 @@ target/appassembler/bin/IndexCollection \
   >& logs/log.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 &
 ```
 
-The path `/path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot/` should point to the corpus downloaded above.
+The path `/path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2/` should point to the corpus downloaded above.
 
 The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the uniCOIL tokens.
 Upon completion, we should have an index with 124,131,404 documents.
@@ -73,7 +90,7 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2/ \
   -topics src/main/resources/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \
@@ -84,7 +101,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.txt
diff --git a/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot.md b/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot.md
index cd34f6c92e..e54c96f80d 100644
--- a/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot.md
+++ b/docs/regressions-dl21-doc-segmented-unicoil-noexp-0shot.md
@@ -22,15 +22,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented-unicoil-noexp-0shot
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl21-doc-segmented-unicoil-noexp-0shot
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar -P collections/
 
@@ -42,12 +53,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2 collections/msmar
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and has an MD5 checksum of `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl21-doc-segmented-unicoil-noexp-0shot \
+  --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot \
@@ -72,7 +89,7 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot/ \
   -topics src/main/resources/topics-and-qrels/topics.dl21.unicoil-noexp.0shot.tsv.gz \
@@ -83,7 +100,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.dl21-doc.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.dl21.unicoil-noexp.0shot.txt
diff --git a/docs/regressions-dl21-passage-unicoil-0shot.md b/docs/regressions-dl21-passage-unicoil-0shot.md
index 826cf775b5..bc35d2c913 100644
--- a/docs/regressions-dl21-passage-unicoil-0shot.md
+++ b/docs/regressions-dl21-passage-unicoil-0shot.md
@@ -17,15 +17,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl21-passage-unicoil-0shot
 ```
 
-## Corpus
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage-unicoil-0shot
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_0shot.tar -P collections/
 
@@ -37,12 +48,18 @@ mv collections/msmarco_v2_passage_unicoil_0shot collections/msmarco-v2-passage-u
 ```
 
 To confirm, `msmarco_v2_passage_unicoil_0shot.tar` is 41 GB and has an MD5 checksum of `1949a00bfd5e1f1a230a04bbc1f01539`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl21-passage-unicoil-0shot \
+  --corpus-path collections/msmarco-v2-passage-unicoil-0shot
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-passage-unicoil-0shot \
@@ -67,7 +84,7 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-passage-unicoil-0shot/ \
   -topics src/main/resources/topics-and-qrels/topics.dl21.unicoil.0shot.tsv.gz \
@@ -78,7 +95,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -l 2 src/main/resources/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot.topics.dl21.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m recip_rank -l 2 src/main/resources/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot.topics.dl21.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m ndcg_cut.10 src/main/resources/topics-and-qrels/qrels.dl21-passage.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot.topics.dl21.unicoil.0shot.txt
diff --git a/docs/regressions-dl21-passage-unicoil-noexp-0shot.md b/docs/regressions-dl21-passage-unicoil-noexp-0shot.md
index 8914dfed79..845ba2a64c 100644
--- a/docs/regressions-dl21-passage-unicoil-noexp-0shot.md
+++ b/docs/regressions-dl21-passage-unicoil-noexp-0shot.md
@@ -17,15 +17,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression dl21-passage-unicoil-noexp-0shot
 ```
 
-## Corpus
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression dl21-passage-unicoil-noexp-0shot
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_noexp_0shot.tar -P collections/
 
@@ -37,6 +48,12 @@ mv collections/msmarco_v2_passage_unicoil_noexp_0shot collections/msmarco-v2-pas
 ```
 
 To confirm, `msmarco_v2_passage_unicoil_noexp_0shot.tar` is 24 GB and has an MD5 checksum of `d9cc1ed3049746e68a2c91bf90e5212d`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression dl21-passage-unicoil-noexp-0shot \
+  --corpus-path collections/msmarco-v2-passage-unicoil-noexp-0shot
+```
 
 ## Indexing
 
diff --git a/docs/regressions-msmarco-doc-segmented-unicoil-noexp.md b/docs/regressions-msmarco-doc-segmented-unicoil-noexp.md
index 30244b7eee..adb9bac6fb 100644
--- a/docs/regressions-msmarco-doc-segmented-unicoil-noexp.md
+++ b/docs/regressions-msmarco-doc-segmented-unicoil-noexp.md
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-doc-segmented-unicoil-noexp
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-doc-segmented-unicoil-noexp
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil-noexp.tar` is 11 GB and has MD5 checksum `11b226e1cacd9c8ae0a660fd14cdd710`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-doc-segmented-unicoil-noexp \
   --corpus-path collections/msmarco-doc-segmented-unicoil-noexp
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-msmarco-doc-segmented-unicoil.md b/docs/regressions-msmarco-doc-segmented-unicoil.md
index 645edb8ece..a13456693b 100644
--- a/docs/regressions-msmarco-doc-segmented-unicoil.md
+++ b/docs/regressions-msmarco-doc-segmented-unicoil.md
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-doc-segmented-unicoil
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-doc-segmented-unicoil
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil.tar` is 19 GB and has MD5 checksum `6a00e2c0c375cb1e52c83ae5ac377ebb`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-doc-segmented-unicoil \
   --corpus-path collections/msmarco-doc-segmented-unicoil
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-msmarco-passage-unicoil-noexp.md b/docs/regressions-msmarco-passage-unicoil-noexp.md
index 993d53d7c5..54f7b1f187 100644
--- a/docs/regressions-msmarco-passage-unicoil-noexp.md
+++ b/docs/regressions-msmarco-passage-unicoil-noexp.md
@@ -19,11 +19,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-passage-unicoil-noexp
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-passage-unicoil-noexp
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -33,16 +40,13 @@ tar xvf collections/msmarco-passage-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil-noexp.tar` is 2.7 GB and has MD5 checksum `f17ddd8c7c00ff121c3c3b147d2e17d8`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-passage-unicoil-noexp \
   --corpus-path collections/msmarco-passage-unicoil-noexp
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-msmarco-passage-unicoil.md b/docs/regressions-msmarco-passage-unicoil.md
index 1bba680865..d30fa85c9d 100644
--- a/docs/regressions-msmarco-passage-unicoil.md
+++ b/docs/regressions-msmarco-passage-unicoil.md
@@ -16,11 +16,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-passage-unicoil
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-passage-unicoil
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -30,16 +37,13 @@ tar xvf collections/msmarco-passage-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil.tar` is 3.4 GB and has MD5 checksum `78eef752c78c8691f7d61600ceed306f`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-passage-unicoil \
   --corpus-path collections/msmarco-passage-unicoil
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot-v2.md b/docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot-v2.md
index bc246ac91d..afa5cbcc2f 100644
--- a/docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot-v2.md
+++ b/docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot-v2.md
@@ -20,15 +20,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-0shot-v2
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-0shot-v2
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot_v2.tar -P collections/
 
@@ -40,12 +51,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_0shot_v2 collections/msmarco-v2-
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_0shot_v2.tar` is 72 GB and has an MD5 checksum of `c5639748c2cbad0152e10b0ebde3b804`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-0shot-v2 \
+  --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot-v2
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-doc-segmented-unicoil-0shot-v2 \
@@ -69,7 +86,7 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot-v2/ \
   -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.unicoil.0shot.tsv.gz \
@@ -86,7 +103,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot.topics.msmarco-v2-doc.dev.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot.topics.msmarco-v2-doc.dev.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot-v2.unicoil-0shot.topics.msmarco-v2-doc.dev.unicoil.0shot.txt
diff --git a/docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot.md b/docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot.md
index dea6944c0c..7be83951dc 100644
--- a/docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot.md
+++ b/docs/regressions-msmarco-v2-doc-segmented-unicoil-0shot.md
@@ -19,15 +19,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-0shot
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-0shot
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot.tar -P collections/
 
@@ -39,12 +50,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_0shot collections/msmarco-v2-doc
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_0shot.tar` is 62 GB and has an MD5 checksum of `889db095113cc4fe152382ccff73304a`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-0shot \
+  --corpus-path collections/msmarco-v2-doc-segmented-unicoil-0shot
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-doc-segmented-unicoil-0shot \
@@ -68,7 +85,7 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot/ \
   -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.unicoil.0shot.tsv.gz \
@@ -85,7 +102,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot.topics.msmarco-v2-doc.dev.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot.topics.msmarco-v2-doc.dev.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-0shot.unicoil-0shot.topics.msmarco-v2-doc.dev.unicoil.0shot.txt
diff --git a/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.md b/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.md
index de64fc89a9..d843786ed9 100644
--- a/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.md
+++ b/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.md
@@ -20,15 +20,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar -P collections/
 
@@ -40,12 +51,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2 collections/msmar
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and has an MD5 checksum of `97ba262c497164de1054f357caea0c63`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 \
+  --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2 \
@@ -69,7 +86,7 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2/ \
   -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.tsv.gz \
@@ -86,7 +103,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.txt
diff --git a/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot.md b/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot.md
index 6f8c8a1bd9..1b55757ee4 100644
--- a/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot.md
+++ b/docs/regressions-msmarco-v2-doc-segmented-unicoil-noexp-0shot.md
@@ -19,15 +19,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-noexp-0shot
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-noexp-0shot
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot.tar -P collections/
 
@@ -39,12 +50,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_noexp_0shot collections/msmarco-
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot.tar` is 54 GB and has an MD5 checksum of `28261587d6afde56efd8df4f950e7fb4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-doc-segmented-unicoil-noexp-0shot \
+  --corpus-path collections/msmarco-v2-doc-segmented-unicoil-noexp-0shot
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot \
@@ -68,7 +85,7 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot/ \
   -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.tsv.gz \
@@ -85,7 +102,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank src/main/resources/topics-and-qrels/qrels.msmarco-v2-doc.dev.txt runs/run.msmarco-v2-doc-segmented-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-doc.dev.unicoil-noexp.0shot.txt
diff --git a/docs/regressions-msmarco-v2-passage-unicoil-0shot.md b/docs/regressions-msmarco-v2-passage-unicoil-0shot.md
index f4a6b55364..983ff2501c 100644
--- a/docs/regressions-msmarco-v2-passage-unicoil-0shot.md
+++ b/docs/regressions-msmarco-v2-passage-unicoil-0shot.md
@@ -14,15 +14,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage-unicoil-0shot
 ```
 
-## Corpus
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage-unicoil-0shot
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_0shot.tar -P collections/
 
@@ -34,12 +45,18 @@ mv collections/msmarco_v2_passage_unicoil_0shot collections/msmarco-v2-passage-u
 ```
 
 To confirm, `msmarco_v2_passage_unicoil_0shot.tar` is 41 GB and has an MD5 checksum of `1949a00bfd5e1f1a230a04bbc1f01539`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage-unicoil-0shot \
+  --corpus-path collections/msmarco-v2-passage-unicoil-0shot
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-passage-unicoil-0shot \
@@ -63,7 +80,7 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-passage-unicoil-0shot/ \
   -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.unicoil.0shot.tsv.gz \
@@ -80,7 +97,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot.topics.msmarco-v2-passage.dev.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot.topics.msmarco-v2-passage.dev.unicoil.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank src/main/resources/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-unicoil-0shot.unicoil-0shot.topics.msmarco-v2-passage.dev.unicoil.0shot.txt
diff --git a/docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md b/docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md
index 0a81582abe..5233b681e9 100644
--- a/docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md
+++ b/docs/regressions-msmarco-v2-passage-unicoil-noexp-0shot.md
@@ -14,15 +14,26 @@ Note that this page is automatically generated from [this template](../src/main/
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage-unicoil-noexp-0shot
 ```
 
-## Corpus
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression msmarco-v2-passage-unicoil-noexp-0shot
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_noexp_0shot.tar -P collections/
 
@@ -34,12 +45,18 @@ mv collections/msmarco_v2_passage_unicoil_noexp_0shot collections/msmarco-v2-pas
 ```
 
 To confirm, `msmarco_v2_passage_unicoil_noexp_0shot.tar` is 24 GB and has an MD5 checksum of `d9cc1ed3049746e68a2c91bf90e5212d`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression msmarco-v2-passage-unicoil-noexp-0shot \
+  --corpus-path collections/msmarco-v2-passage-unicoil-noexp-0shot
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 target/appassembler/bin/IndexCollection \
   -collection JsonVectorCollection \
   -input /path/to/msmarco-v2-passage-unicoil-noexp-0shot \
@@ -63,7 +80,7 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot/ \
   -topics src/main/resources/topics-and-qrels/topics.msmarco-v2-passage.dev.unicoil-noexp.0shot.tsv.gz \
@@ -80,7 +97,7 @@ target/appassembler/bin/SearchCollection \
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 src/main/resources/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-passage.dev.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 src/main/resources/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-passage.dev.unicoil-noexp.0shot.txt
 tools/eval/trec_eval.9.0.4/trec_eval -c -M 100 -m map -c -M 100 -m recip_rank src/main/resources/topics-and-qrels/qrels.msmarco-v2-passage.dev.txt runs/run.msmarco-v2-passage-unicoil-noexp-0shot.unicoil-noexp-0shot.topics.msmarco-v2-passage.dev.unicoil-noexp.0shot.txt
diff --git a/src/main/python/run_regression.py b/src/main/python/run_regression.py
index e68d6f2c08..b7f4127cdc 100644
--- a/src/main/python/run_regression.py
+++ b/src/main/python/run_regression.py
@@ -16,16 +16,21 @@
 
 from __future__ import print_function
 
-import itertools
-import sys
-
 import argparse
+import hashlib
+import itertools
 import logging
 import os
-import yaml
+import re
+import stat
+import tarfile
+
 from multiprocessing import Pool
 from subprocess import call, Popen, PIPE
+from urllib.request import urlretrieve
 
+import yaml
+from tqdm import tqdm
 
 logger = logging.getLogger('regression_test')
 logger.setLevel(logging.INFO)
@@ -177,10 +182,77 @@ def run_search(cmd):
     call(' '.join(cmd), shell=True)
 
 
+# https://gist.github.com/leimao/37ff6e990b3226c2c9670a2cd1e4a6f5
+class TqdmUpTo(tqdm):
+    def update_to(self, b=1, bsize=1, tsize=None):
+        """
+        b  : int, optional
+            Number of blocks transferred so far [default: 1].
+        bsize  : int, optional
+            Size of each block (in tqdm units) [default: 1].
+        tsize  : int, optional
+            Total size (in tqdm units). If [default: None] remains unchanged.
+        """
+        if tsize is not None:
+            self.total = tsize
+        self.update(b * bsize - self.n)  # will also set self.n = b * bsize
+
+
+# For large files, we need to compute MD5 block by block. See:
+# https://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python
+def compute_md5(file, block_size=2**20):
+    m = hashlib.md5()
+    with open(file, 'rb') as f:
+        while True:
+            buf = f.read(block_size)
+            if not buf:
+                break
+            m.update(buf)
+    return m.hexdigest()
+
+
+def download_url(url, save_dir, local_filename=None, md5=None, force=False, verbose=True):
+    # If caller does not specify local filename, figure it out from the download URL:
+    if not local_filename:
+        filename = url.split('/')[-1]
+        filename = re.sub('\\?dl=1$', '', filename)  # Remove the Dropbox 'force download' parameter
+    else:
+        # Otherwise, use the specified local_filename:
+        filename = local_filename
+
+    destination_path = os.path.join(save_dir, filename)
+
+    if verbose:
+        logger.info(f'Downloading {url} to {destination_path}...')
+
+    # Check to see if file already exists, if so, simply return (quietly) unless force=True, in which case we remove
+    # destination file and download fresh copy.
+    if os.path.exists(destination_path):
+        if verbose:
+            logger.info(f'{destination_path} already exists!')
+        if not force:
+            if verbose:
+                logger.info(f'Skipping download.')
+            return destination_path
+        if verbose:
+            logger.info(f'force=True, removing {destination_path}; fetching fresh copy...')
+        os.remove(destination_path)
+
+    with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, desc=filename) as t:
+        urlretrieve(url, filename=destination_path, reporthook=t.update_to)
+
+    if md5:
+        md5_computed = compute_md5(destination_path)
+        assert md5_computed == md5, f'{destination_path} does not match checksum! Expecting {md5} got {md5_computed}.'
+
+    return destination_path
+
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='Run Anserini regression tests.')
     parser.add_argument('--regression', required=True, help='Name of the regression test.')
     parser.add_argument('--corpus-path', dest='corpus_path', default='', help='Override corpus path from YAML')
+    parser.add_argument('--download', dest='download', action='store_true', help='Build index.')
     parser.add_argument('--index', dest='index', action='store_true', help='Build index.')
     parser.add_argument('--index-threads', type=int, default=-1, help='Override number of indexing threads from YAML')
     parser.add_argument('--verify', dest='verify', action='store_true', help='Verify index statistics.')
@@ -194,6 +266,32 @@ def run_search(cmd):
     with open('src/main/resources/regression/{}.yaml'.format(args.regression)) as f:
         yaml_data = yaml.safe_load(f)
 
+    if args.download:
+        logger.info('='*10 + ' Downloading Corpus ' + '='*10)
+        if not yaml_data['download_url']:
+            raise ValueError('Corpus download URL known!')
+        url = yaml_data['download_url']
+        download_url(url, 'collections', md5=yaml_data['download_checksum'])
+
+        filename = url.split('/')[-1]
+        local_tarball = os.path.join('collections', filename)
+        logger.info(f'Extracting {local_tarball}...')
+        tarball = tarfile.open(local_tarball)
+        tarball.extractall('collections')
+        tarball.close()
+
+        # e.g., MS MARCO V2: need to rename the corpus
+        if 'download_corpus' in yaml_data:
+            src = os.path.join('collections', yaml_data['download_corpus'])
+            dest = os.path.join('collections', yaml_data['corpus'])
+            logger.info(f'Renaming {src} to {dest}')
+            os.chmod(src, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
+            os.rename(src, dest)
+
+        path = os.path.join('collections', yaml_data['corpus'])
+        logger.info(f'Corpus path is {path}')
+        args.corpus_path = path
+
     # Build indexes.
     if args.index:
         logger.info('='*10 + ' Indexing ' + '='*10)
diff --git a/src/main/resources/docgen/templates/dl19-doc-segmented-unicoil-noexp.template b/src/main/resources/docgen/templates/dl19-doc-segmented-unicoil-noexp.template
index 17b220afad..f17dc6a706 100644
--- a/src/main/resources/docgen/templates/dl19-doc-segmented-unicoil-noexp.template
+++ b/src/main/resources/docgen/templates/dl19-doc-segmented-unicoil-noexp.template
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil-noexp.tar` is 11 GB and has MD5 checksum `11b226e1cacd9c8ae0a660fd14cdd710`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/dl19-doc-segmented-unicoil.template b/src/main/resources/docgen/templates/dl19-doc-segmented-unicoil.template
index 2f8610d0e7..e2f27ced45 100644
--- a/src/main/resources/docgen/templates/dl19-doc-segmented-unicoil.template
+++ b/src/main/resources/docgen/templates/dl19-doc-segmented-unicoil.template
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil.tar` is 19 GB and has MD5 checksum `6a00e2c0c375cb1e52c83ae5ac377ebb`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/dl19-passage-unicoil-noexp.template b/src/main/resources/docgen/templates/dl19-passage-unicoil-noexp.template
index c012e6fb2d..4b13b3446b 100644
--- a/src/main/resources/docgen/templates/dl19-passage-unicoil-noexp.template
+++ b/src/main/resources/docgen/templates/dl19-passage-unicoil-noexp.template
@@ -22,11 +22,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -36,16 +43,13 @@ tar xvf collections/msmarco-passage-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil-noexp.tar` is 2.7 GB and has MD5 checksum `f17ddd8c7c00ff121c3c3b147d2e17d8`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/dl19-passage-unicoil.template b/src/main/resources/docgen/templates/dl19-passage-unicoil.template
index 987a022637..fd038c60fd 100644
--- a/src/main/resources/docgen/templates/dl19-passage-unicoil.template
+++ b/src/main/resources/docgen/templates/dl19-passage-unicoil.template
@@ -22,11 +22,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -36,16 +43,13 @@ tar xvf collections/msmarco-passage-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil.tar` is 3.4 GB and has MD5 checksum `78eef752c78c8691f7d61600ceed306f`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/dl20-doc-segmented-unicoil-noexp.template b/src/main/resources/docgen/templates/dl20-doc-segmented-unicoil-noexp.template
index 42cd87c4ac..22f4b3f2b6 100644
--- a/src/main/resources/docgen/templates/dl20-doc-segmented-unicoil-noexp.template
+++ b/src/main/resources/docgen/templates/dl20-doc-segmented-unicoil-noexp.template
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil-noexp.tar` is 11 GB and has MD5 checksum `11b226e1cacd9c8ae0a660fd14cdd710`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/dl20-doc-segmented-unicoil.template b/src/main/resources/docgen/templates/dl20-doc-segmented-unicoil.template
index 83b69beca4..bbd6489e7f 100644
--- a/src/main/resources/docgen/templates/dl20-doc-segmented-unicoil.template
+++ b/src/main/resources/docgen/templates/dl20-doc-segmented-unicoil.template
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil.tar` is 19 GB and has MD5 checksum `6a00e2c0c375cb1e52c83ae5ac377ebb`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/dl20-passage-unicoil-noexp.template b/src/main/resources/docgen/templates/dl20-passage-unicoil-noexp.template
index e38f7df879..2e2142acaa 100644
--- a/src/main/resources/docgen/templates/dl20-passage-unicoil-noexp.template
+++ b/src/main/resources/docgen/templates/dl20-passage-unicoil-noexp.template
@@ -22,11 +22,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -36,16 +43,13 @@ tar xvf collections/msmarco-passage-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil-noexp.tar` is 2.7 GB and has MD5 checksum `f17ddd8c7c00ff121c3c3b147d2e17d8`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/dl20-passage-unicoil.template b/src/main/resources/docgen/templates/dl20-passage-unicoil.template
index 9905eb9b38..9457f31e87 100644
--- a/src/main/resources/docgen/templates/dl20-passage-unicoil.template
+++ b/src/main/resources/docgen/templates/dl20-passage-unicoil.template
@@ -22,11 +22,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -36,16 +43,13 @@ tar xvf collections/msmarco-passage-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil.tar` is 3.4 GB and has MD5 checksum `78eef752c78c8691f7d61600ceed306f`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-0shot-v2.template b/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-0shot-v2.template
index 376c778c04..24db4231d8 100644
--- a/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-0shot-v2.template
+++ b/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-0shot-v2.template
@@ -23,15 +23,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot_v2.tar -P collections/
 
@@ -43,16 +54,22 @@ mv collections/msmarco_v2_doc_segmented_unicoil_0shot_v2 collections/msmarco-v2-
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_0shot_v2.tar` is 72 GB and has an MD5 checksum of `c5639748c2cbad0152e10b0ebde3b804`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
-The path `/path/to/msmarco-v2-doc-segmented-unicoil-0shot/` should point to the corpus downloaded above.
+The path `/path/to/msmarco-v2-doc-segmented-unicoil-0shot-v2/` should point to the corpus downloaded above.
 
 The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the uniCOIL tokens.
 Upon completion, we should have an index with 124,131,414 documents.
@@ -67,13 +84,13 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-0shot.template b/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-0shot.template
index 0bbd29928e..2b939bc0ff 100644
--- a/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-0shot.template
+++ b/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-0shot.template
@@ -22,15 +22,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot.tar -P collections/
 
@@ -42,12 +53,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_0shot collections/msmarco-v2-doc
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_0shot.tar` is 62 GB and has an MD5 checksum of `889db095113cc4fe152382ccff73304a`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
@@ -66,13 +83,13 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-noexp-0shot-v2.template b/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-noexp-0shot-v2.template
index 25ca6a6111..f461cb4921 100644
--- a/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-noexp-0shot-v2.template
+++ b/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-noexp-0shot-v2.template
@@ -23,15 +23,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar -P collections/
 
@@ -43,16 +54,22 @@ mv collections/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2 collections/msmar
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and has an MD5 checksum of `97ba262c497164de1054f357caea0c63`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
-The path `/path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot/` should point to the corpus downloaded above.
+The path `/path/to/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2/` should point to the corpus downloaded above.
 
 The important indexing options to note here are `-impact -pretokenized`: the first tells Anserini not to encode BM25 doclengths into Lucene's norms (which is the default) and the second option says not to apply any additional tokenization on the uniCOIL tokens.
 Upon completion, we should have an index with 124,131,404 documents.
@@ -67,13 +84,13 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-noexp-0shot.template
index 084e87e199..96f39124a2 100644
--- a/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-noexp-0shot.template
+++ b/src/main/resources/docgen/templates/dl21-doc-segmented-unicoil-noexp-0shot.template
@@ -22,15 +22,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar -P collections/
 
@@ -42,12 +53,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2 collections/msmar
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and has an MD5 checksum of `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
@@ -66,13 +83,13 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/dl21-passage-unicoil-0shot.template b/src/main/resources/docgen/templates/dl21-passage-unicoil-0shot.template
index 1b25a77d9a..f94e21ab2e 100644
--- a/src/main/resources/docgen/templates/dl21-passage-unicoil-0shot.template
+++ b/src/main/resources/docgen/templates/dl21-passage-unicoil-0shot.template
@@ -17,15 +17,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_0shot.tar -P collections/
 
@@ -37,12 +48,18 @@ mv collections/msmarco_v2_passage_unicoil_0shot collections/msmarco-v2-passage-u
 ```
 
 To confirm, `msmarco_v2_passage_unicoil_0shot.tar` is 41 GB and has an MD5 checksum of `1949a00bfd5e1f1a230a04bbc1f01539`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
@@ -61,13 +78,13 @@ The original data can be found [here](https://trec.nist.gov/data/deep2021.html).
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/dl21-passage-unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/dl21-passage-unicoil-noexp-0shot.template
index 32c95aab3f..1aafe2dfa6 100644
--- a/src/main/resources/docgen/templates/dl21-passage-unicoil-noexp-0shot.template
+++ b/src/main/resources/docgen/templates/dl21-passage-unicoil-noexp-0shot.template
@@ -17,15 +17,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_noexp_0shot.tar -P collections/
 
@@ -37,6 +48,12 @@ mv collections/msmarco_v2_passage_unicoil_noexp_0shot collections/msmarco-v2-pas
 ```
 
 To confirm, `msmarco_v2_passage_unicoil_noexp_0shot.tar` is 24 GB and has an MD5 checksum of `d9cc1ed3049746e68a2c91bf90e5212d`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
diff --git a/src/main/resources/docgen/templates/msmarco-doc-segmented-unicoil-noexp.template b/src/main/resources/docgen/templates/msmarco-doc-segmented-unicoil-noexp.template
index 28231b6442..7b3c439d95 100644
--- a/src/main/resources/docgen/templates/msmarco-doc-segmented-unicoil-noexp.template
+++ b/src/main/resources/docgen/templates/msmarco-doc-segmented-unicoil-noexp.template
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil-noexp.tar` is 11 GB and has MD5 checksum `11b226e1cacd9c8ae0a660fd14cdd710`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/msmarco-doc-segmented-unicoil.template b/src/main/resources/docgen/templates/msmarco-doc-segmented-unicoil.template
index 4a2885906b..166eb6e3d3 100644
--- a/src/main/resources/docgen/templates/msmarco-doc-segmented-unicoil.template
+++ b/src/main/resources/docgen/templates/msmarco-doc-segmented-unicoil.template
@@ -20,11 +20,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO segmented document corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -34,16 +41,13 @@ tar xvf collections/msmarco-doc-segmented-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-doc-segmented-unicoil.tar` is 19 GB and has MD5 checksum `6a00e2c0c375cb1e52c83ae5ac377ebb`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/msmarco-passage-unicoil-noexp.template b/src/main/resources/docgen/templates/msmarco-passage-unicoil-noexp.template
index 0374f46ac3..b57eccd4d5 100644
--- a/src/main/resources/docgen/templates/msmarco-passage-unicoil-noexp.template
+++ b/src/main/resources/docgen/templates/msmarco-passage-unicoil-noexp.template
@@ -19,11 +19,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -33,16 +40,13 @@ tar xvf collections/msmarco-passage-unicoil-noexp.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil-noexp.tar` is 2.7 GB and has MD5 checksum `f17ddd8c7c00ff121c3c3b147d2e17d8`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/msmarco-passage-unicoil.template b/src/main/resources/docgen/templates/msmarco-passage-unicoil.template
index cd5cf4bf68..96f26c9b4e 100644
--- a/src/main/resources/docgen/templates/msmarco-passage-unicoil.template
+++ b/src/main/resources/docgen/templates/msmarco-passage-unicoil.template
@@ -16,11 +16,18 @@ From one of our Waterloo servers (e.g., `orca`), the following command will perf
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus Download
-
-We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., gone through document expansion and term reweighting.
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
 Thus, no neural inference is involved.
-For details on how to train uniCOIL and perform inference, please see [this guide](https://github.com/luyug/COIL/tree/main/uniCOIL).
+
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
+
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
+```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
 
 Download the corpus and unpack into `collections/`:
 
@@ -30,16 +37,13 @@ tar xvf collections/msmarco-passage-unicoil.tar -C collections/
 ```
 
 To confirm, `msmarco-passage-unicoil.tar` is 3.4 GB and has MD5 checksum `78eef752c78c8691f7d61600ceed306f`.
-
-With the corpus downloaded, the following command will perform the complete regression, end to end, on any machine:
+With the corpus downloaded, the following command will perform the remaining steps below:
 
 ```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
   --corpus-path collections/${corpus}
 ```
 
-Alternatively, you can simply copy/paste from the commands below and obtain the same results.
-
 ## Indexing
 
 Sample indexing command:
diff --git a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-0shot-v2.template b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-0shot-v2.template
index 2b57ada9d7..89445a029e 100644
--- a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-0shot-v2.template
+++ b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-0shot-v2.template
@@ -20,15 +20,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot_v2.tar -P collections/
 
@@ -40,12 +51,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_0shot_v2 collections/msmarco-v2-
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_0shot_v2.tar` is 72 GB and has an MD5 checksum of `c5639748c2cbad0152e10b0ebde3b804`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
@@ -63,13 +80,13 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-0shot.template b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-0shot.template
index 1163eae014..a448d0ed42 100644
--- a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-0shot.template
+++ b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-0shot.template
@@ -19,15 +19,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot.tar -P collections/
 
@@ -39,12 +50,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_0shot collections/msmarco-v2-doc
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_0shot.tar` is 62 GB and has an MD5 checksum of `889db095113cc4fe152382ccff73304a`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
@@ -62,13 +79,13 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.template b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.template
index 2e902baf47..013347cdb8 100644
--- a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.template
+++ b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.template
@@ -20,15 +20,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar -P collections/
 
@@ -40,12 +51,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2 collections/msmar
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar` is 55 GB and has an MD5 checksum of `97ba262c497164de1054f357caea0c63`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
@@ -63,13 +80,13 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-noexp-0shot.template
index 32709bbaeb..03699a3cc8 100644
--- a/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-noexp-0shot.template
+++ b/src/main/resources/docgen/templates/msmarco-v2-doc-segmented-unicoil-noexp-0shot.template
@@ -19,15 +19,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO document corpus that has already been processed with uniCOIL (per above), i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot.tar -P collections/
 
@@ -39,12 +50,18 @@ mv collections/msmarco_v2_doc_segmented_unicoil_noexp_0shot collections/msmarco-
 ```
 
 To confirm, `msmarco_v2_doc_segmented_unicoil_noexp_0shot.tar` is 54 GB and has an MD5 checksum of `28261587d6afde56efd8df4f950e7fb4`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
@@ -62,13 +79,13 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/msmarco-v2-passage-unicoil-0shot.template b/src/main/resources/docgen/templates/msmarco-v2-passage-unicoil-0shot.template
index 8d91c6d09b..a5e456ede1 100644
--- a/src/main/resources/docgen/templates/msmarco-v2-passage-unicoil-0shot.template
+++ b/src/main/resources/docgen/templates/msmarco-v2-passage-unicoil-0shot.template
@@ -14,15 +14,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have applied doc2query-T5 expansions, performed model inference on every document, and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_0shot.tar -P collections/
 
@@ -34,12 +45,18 @@ mv collections/msmarco_v2_passage_unicoil_0shot collections/msmarco-v2-passage-u
 ```
 
 To confirm, `msmarco_v2_passage_unicoil_0shot.tar` is 41 GB and has an MD5 checksum of `1949a00bfd5e1f1a230a04bbc1f01539`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
@@ -57,13 +74,13 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/docgen/templates/msmarco-v2-passage-unicoil-noexp-0shot.template b/src/main/resources/docgen/templates/msmarco-v2-passage-unicoil-noexp-0shot.template
index 1adb7015ce..24b7afd12b 100644
--- a/src/main/resources/docgen/templates/msmarco-v2-passage-unicoil-noexp-0shot.template
+++ b/src/main/resources/docgen/templates/msmarco-v2-passage-unicoil-noexp-0shot.template
@@ -14,15 +14,26 @@ Note that this page is automatically generated from [this template](${template})
 
 From one of our Waterloo servers (e.g., `orca`), the following command will perform the complete regression, end to end:
 
-```
+```bash
 python src/main/python/run_regression.py --index --verify --search --regression ${test_name}
 ```
 
-## Corpus
+We make available a version of the MS MARCO passage corpus that has already been processed with uniCOIL, i.e., we have performed model inference on every document and stored the output sparse vectors.
+Thus, no neural inference is involved.
 
-Download, unpack, and prepare the corpus:
+From any machine, the following command will download the corpus and perform the complete regression, end to end:
 
+```bash
+python src/main/python/run_regression.py --download --index --verify --search --regression ${test_name}
 ```
+
+The `run_regression.py` script automates the following steps, but if you want to perform each step manually, simply copy/paste from the commands below and you'll obtain the same regression results.
+
+## Corpus Download
+
+Download, unpack, and prepare the corpus:
+
+```bash
 # Download
 wget https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_noexp_0shot.tar -P collections/
 
@@ -34,12 +45,18 @@ mv collections/msmarco_v2_passage_unicoil_noexp_0shot collections/msmarco-v2-pas
 ```
 
 To confirm, `msmarco_v2_passage_unicoil_noexp_0shot.tar` is 24 GB and has an MD5 checksum of `d9cc1ed3049746e68a2c91bf90e5212d`.
+With the corpus downloaded, the following command will perform the remaining steps below:
+
+```bash
+python src/main/python/run_regression.py --index --verify --search --regression ${test_name} \
+  --corpus-path collections/${corpus}
+```
 
 ## Indexing
 
 Sample indexing command:
 
-```
+```bash
 ${index_cmds}
 ```
 
@@ -57,13 +74,13 @@ These regression experiments use the [dev queries](../src/main/resources/topics-
 
 After indexing has completed, you should be able to perform retrieval as follows:
 
-```
+```bash
 ${ranking_cmds}
 ```
 
 Evaluation can be performed using `trec_eval`:
 
-```
+```bash
 ${eval_cmds}
 ```
 
diff --git a/src/main/resources/regression/dl19-doc-segmented-unicoil-noexp.yaml b/src/main/resources/regression/dl19-doc-segmented-unicoil-noexp.yaml
index 017ce91548..e24df19f20 100644
--- a/src/main/resources/regression/dl19-doc-segmented-unicoil-noexp.yaml
+++ b/src/main/resources/regression/dl19-doc-segmented-unicoil-noexp.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-doc-segmented-unicoil-noexp
 corpus_path: collections/msmarco/msmarco-doc-segmented-unicoil-noexp/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-doc-segmented-unicoil-noexp.tar
+download_checksum: 11b226e1cacd9c8ae0a660fd14cdd710
+
 index_path: indexes/lucene-index.msmarco-doc-segmented-unicoil-noexp/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl19-doc-segmented-unicoil.yaml b/src/main/resources/regression/dl19-doc-segmented-unicoil.yaml
index c526ed22bc..12088c713f 100644
--- a/src/main/resources/regression/dl19-doc-segmented-unicoil.yaml
+++ b/src/main/resources/regression/dl19-doc-segmented-unicoil.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-doc-segmented-unicoil
 corpus_path: collections/msmarco/msmarco-doc-segmented-unicoil/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-doc-segmented-unicoil.tar
+download_checksum: 6a00e2c0c375cb1e52c83ae5ac377ebb
+
 index_path: indexes/lucene-index.msmarco-doc-segmented-unicoil/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl19-passage-unicoil-noexp.yaml b/src/main/resources/regression/dl19-passage-unicoil-noexp.yaml
index 870c3d20cc..9e16a051fa 100644
--- a/src/main/resources/regression/dl19-passage-unicoil-noexp.yaml
+++ b/src/main/resources/regression/dl19-passage-unicoil-noexp.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-passage-unicoil-noexp
 corpus_path: collections/msmarco/msmarco-passage-unicoil-noexp/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil-noexp.tar
+download_checksum: f17ddd8c7c00ff121c3c3b147d2e17d8
+
 index_path: indexes/lucene-index.msmarco-passage-unicoil-noexp/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl19-passage-unicoil.yaml b/src/main/resources/regression/dl19-passage-unicoil.yaml
index de19dc6ce2..ffadde3e52 100644
--- a/src/main/resources/regression/dl19-passage-unicoil.yaml
+++ b/src/main/resources/regression/dl19-passage-unicoil.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-passage-unicoil
 corpus_path: collections/msmarco/msmarco-passage-unicoil/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil.tar
+download_checksum: 78eef752c78c8691f7d61600ceed306f
+
 index_path: indexes/lucene-index.msmarco-passage-unicoil/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl20-doc-segmented-unicoil-noexp.yaml b/src/main/resources/regression/dl20-doc-segmented-unicoil-noexp.yaml
index adf0aeacf1..995d552c05 100644
--- a/src/main/resources/regression/dl20-doc-segmented-unicoil-noexp.yaml
+++ b/src/main/resources/regression/dl20-doc-segmented-unicoil-noexp.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-doc-segmented-unicoil-noexp
 corpus_path: collections/msmarco/msmarco-doc-segmented-unicoil-noexp/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-doc-segmented-unicoil-noexp.tar
+download_checksum: 11b226e1cacd9c8ae0a660fd14cdd710
+
 index_path: indexes/lucene-index.msmarco-doc-segmented-unicoil-noexp/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl20-doc-segmented-unicoil.yaml b/src/main/resources/regression/dl20-doc-segmented-unicoil.yaml
index 306ea01f2d..7e6328312d 100644
--- a/src/main/resources/regression/dl20-doc-segmented-unicoil.yaml
+++ b/src/main/resources/regression/dl20-doc-segmented-unicoil.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-doc-segmented-unicoil
 corpus_path: collections/msmarco/msmarco-doc-segmented-unicoil/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-doc-segmented-unicoil.tar
+download_checksum: 6a00e2c0c375cb1e52c83ae5ac377ebb
+
 index_path: indexes/lucene-index.msmarco-doc-segmented-unicoil/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl20-passage-unicoil-noexp.yaml b/src/main/resources/regression/dl20-passage-unicoil-noexp.yaml
index 99e7b75162..bee354aa32 100644
--- a/src/main/resources/regression/dl20-passage-unicoil-noexp.yaml
+++ b/src/main/resources/regression/dl20-passage-unicoil-noexp.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-passage-unicoil-noexp
 corpus_path: collections/msmarco/msmarco-passage-unicoil-noexp/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil-noexp.tar
+download_checksum: f17ddd8c7c00ff121c3c3b147d2e17d8
+
 index_path: indexes/lucene-index.msmarco-passage-unicoil-noexp/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl20-passage-unicoil.yaml b/src/main/resources/regression/dl20-passage-unicoil.yaml
index e4fe7ccf01..6abd02df96 100644
--- a/src/main/resources/regression/dl20-passage-unicoil.yaml
+++ b/src/main/resources/regression/dl20-passage-unicoil.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-passage-unicoil
 corpus_path: collections/msmarco/msmarco-passage-unicoil/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil.tar
+download_checksum: 78eef752c78c8691f7d61600ceed306f
+
 index_path: indexes/lucene-index.msmarco-passage-unicoil/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl21-doc-segmented-unicoil-0shot-v2.yaml b/src/main/resources/regression/dl21-doc-segmented-unicoil-0shot-v2.yaml
index 3ac6fb99e5..1e9b4465a4 100644
--- a/src/main/resources/regression/dl21-doc-segmented-unicoil-0shot-v2.yaml
+++ b/src/main/resources/regression/dl21-doc-segmented-unicoil-0shot-v2.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-doc-segmented-unicoil-0shot-v2
 corpus_path: collections/msmarco/msmarco_v2_doc_segmented_unicoil_0shot_v2/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot_v2.tar
+download_checksum: c5639748c2cbad0152e10b0ebde3b804
+download_corpus: msmarco_v2_doc_segmented_unicoil_0shot_v2
+
 index_path: indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot-v2/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl21-doc-segmented-unicoil-0shot.yaml b/src/main/resources/regression/dl21-doc-segmented-unicoil-0shot.yaml
index fc258cae15..614b4c6440 100644
--- a/src/main/resources/regression/dl21-doc-segmented-unicoil-0shot.yaml
+++ b/src/main/resources/regression/dl21-doc-segmented-unicoil-0shot.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-doc-segmented-unicoil-0shot
 corpus_path: collections/msmarco/msmarco_v2_doc_segmented_unicoil_0shot/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot.tar
+download_checksum: 889db095113cc4fe152382ccff73304a
+download_corpus: msmarco_v2_doc_segmented_unicoil_0shot
+
 index_path: indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot-v2.yaml b/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot-v2.yaml
index 793fab46f4..092c2d1782 100644
--- a/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot-v2.yaml
+++ b/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot-v2.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2
 corpus_path: collections/msmarco/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar
+download_checksum: 97ba262c497164de1054f357caea0c63
+download_corpus: msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2
+
 index_path: indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot.yaml b/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot.yaml
index 3a7a4953b9..0bb953ff4d 100644
--- a/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot.yaml
+++ b/src/main/resources/regression/dl21-doc-segmented-unicoil-noexp-0shot.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-doc-segmented-unicoil-noexp-0shot
 corpus_path: collections/msmarco/msmarco_v2_doc_segmented_unicoil_noexp_0shot/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot.tar
+download_checksum: 28261587d6afde56efd8df4f950e7fb4
+download_corpus: msmarco_v2_doc_segmented_unicoil_noexp_0shot
+
 index_path: indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl21-passage-unicoil-0shot.yaml b/src/main/resources/regression/dl21-passage-unicoil-0shot.yaml
index 8980b6debd..e46b4f1c1a 100644
--- a/src/main/resources/regression/dl21-passage-unicoil-0shot.yaml
+++ b/src/main/resources/regression/dl21-passage-unicoil-0shot.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-passage-unicoil-0shot
 corpus_path: collections/msmarco/msmarco_v2_passage_unicoil_0shot/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_0shot.tar
+download_checksum: 1949a00bfd5e1f1a230a04bbc1f01539
+download_corpus: msmarco_v2_passage_unicoil_0shot
+
 index_path: indexes/lucene-index.msmarco-v2-passage-unicoil-0shot/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/dl21-passage-unicoil-noexp-0shot.yaml b/src/main/resources/regression/dl21-passage-unicoil-noexp-0shot.yaml
index e2e3f151c5..bf0c8c7939 100644
--- a/src/main/resources/regression/dl21-passage-unicoil-noexp-0shot.yaml
+++ b/src/main/resources/regression/dl21-passage-unicoil-noexp-0shot.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-passage-unicoil-noexp-0shot
 corpus_path: collections/msmarco/msmarco_v2_passage_unicoil_noexp_0shot/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_noexp_0shot.tar
+download_checksum: d9cc1ed3049746e68a2c91bf90e5212d
+download_corpus: msmarco_v2_passage_unicoil_noexp_0shot
+
 index_path: indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-doc-segmented-unicoil-noexp.yaml b/src/main/resources/regression/msmarco-doc-segmented-unicoil-noexp.yaml
index 47f8989d78..320f9e9c46 100644
--- a/src/main/resources/regression/msmarco-doc-segmented-unicoil-noexp.yaml
+++ b/src/main/resources/regression/msmarco-doc-segmented-unicoil-noexp.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-doc-segmented-unicoil-noexp
 corpus_path: collections/msmarco/msmarco-doc-segmented-unicoil-noexp/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-doc-segmented-unicoil-noexp.tar
+download_checksum: 11b226e1cacd9c8ae0a660fd14cdd710
+
 index_path: indexes/lucene-index.msmarco-doc-segmented-unicoil-noexp/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-doc-segmented-unicoil.yaml b/src/main/resources/regression/msmarco-doc-segmented-unicoil.yaml
index a09adedab5..18895c13b4 100644
--- a/src/main/resources/regression/msmarco-doc-segmented-unicoil.yaml
+++ b/src/main/resources/regression/msmarco-doc-segmented-unicoil.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-doc-segmented-unicoil
 corpus_path: collections/msmarco/msmarco-doc-segmented-unicoil/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-doc-segmented-unicoil.tar
+download_checksum: 6a00e2c0c375cb1e52c83ae5ac377ebb
+
 index_path: indexes/lucene-index.msmarco-doc-segmented-unicoil/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-passage-unicoil-noexp.yaml b/src/main/resources/regression/msmarco-passage-unicoil-noexp.yaml
index a764e01e55..2798dbb1c7 100644
--- a/src/main/resources/regression/msmarco-passage-unicoil-noexp.yaml
+++ b/src/main/resources/regression/msmarco-passage-unicoil-noexp.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-passage-unicoil-noexp
 corpus_path: collections/msmarco/msmarco-passage-unicoil-noexp/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil-noexp.tar
+download_checksum: f17ddd8c7c00ff121c3c3b147d2e17d8
+
 index_path: indexes/lucene-index.msmarco-passage-unicoil-noexp/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-passage-unicoil.yaml b/src/main/resources/regression/msmarco-passage-unicoil.yaml
index 06b7c025db..e5fda2f78f 100644
--- a/src/main/resources/regression/msmarco-passage-unicoil.yaml
+++ b/src/main/resources/regression/msmarco-passage-unicoil.yaml
@@ -2,6 +2,9 @@
 corpus: msmarco-passage-unicoil
 corpus_path: collections/msmarco/msmarco-passage-unicoil/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco-passage-unicoil.tar
+download_checksum: 78eef752c78c8691f7d61600ceed306f
+
 index_path: indexes/lucene-index.msmarco-passage-unicoil/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-0shot-v2.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-0shot-v2.yaml
index e6a8728f30..ab00af36b9 100644
--- a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-0shot-v2.yaml
+++ b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-0shot-v2.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-doc-segmented-unicoil-0shot-v2
 corpus_path: collections/msmarco/msmarco_v2_doc_segmented_unicoil_0shot_v2/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot_v2.tar
+download_checksum: c5639748c2cbad0152e10b0ebde3b804
+download_corpus: msmarco_v2_doc_segmented_unicoil_0shot_v2
+
 index_path: indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot-v2/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-0shot.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-0shot.yaml
index 0c46e799b4..8e4e71e603 100644
--- a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-0shot.yaml
+++ b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-0shot.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-doc-segmented-unicoil-0shot
 corpus_path: collections/msmarco/msmarco_v2_doc_segmented_unicoil_0shot/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_0shot.tar
+download_checksum: 889db095113cc4fe152382ccff73304a
+download_corpus: msmarco_v2_doc_segmented_unicoil_0shot
+
 index_path: indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-0shot/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.yaml
index 3a2aab6e98..86e623a7ed 100644
--- a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.yaml
+++ b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2
 corpus_path: collections/msmarco/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2.tar
+download_checksum: 97ba262c497164de1054f357caea0c63
+download_corpus: msmarco_v2_doc_segmented_unicoil_noexp_0shot_v2
+
 index_path: indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot-v2/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot.yaml b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot.yaml
index fb20eba3d2..e2bf5d87eb 100644
--- a/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot.yaml
+++ b/src/main/resources/regression/msmarco-v2-doc-segmented-unicoil-noexp-0shot.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-doc-segmented-unicoil-noexp-0shot
 corpus_path: collections/msmarco/msmarco_v2_doc_segmented_unicoil_noexp_0shot/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_doc_segmented_unicoil_noexp_0shot.tar
+download_checksum: 28261587d6afde56efd8df4f950e7fb4
+download_corpus: msmarco_v2_doc_segmented_unicoil_noexp_0shot
+
 index_path: indexes/lucene-index.msmarco-v2-doc-segmented-unicoil-noexp-0shot/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-v2-passage-unicoil-0shot.yaml b/src/main/resources/regression/msmarco-v2-passage-unicoil-0shot.yaml
index 44eae58b22..fc15f863cc 100644
--- a/src/main/resources/regression/msmarco-v2-passage-unicoil-0shot.yaml
+++ b/src/main/resources/regression/msmarco-v2-passage-unicoil-0shot.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-passage-unicoil-0shot
 corpus_path: collections/msmarco/msmarco_v2_passage_unicoil_0shot/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_0shot.tar
+download_checksum: 1949a00bfd5e1f1a230a04bbc1f01539
+download_corpus: msmarco_v2_passage_unicoil_0shot
+
 index_path: indexes/lucene-index.msmarco-v2-passage-unicoil-0shot/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/main/resources/regression/msmarco-v2-passage-unicoil-noexp-0shot.yaml b/src/main/resources/regression/msmarco-v2-passage-unicoil-noexp-0shot.yaml
index 8218eacf36..04da480dc0 100644
--- a/src/main/resources/regression/msmarco-v2-passage-unicoil-noexp-0shot.yaml
+++ b/src/main/resources/regression/msmarco-v2-passage-unicoil-noexp-0shot.yaml
@@ -2,6 +2,10 @@
 corpus: msmarco-v2-passage-unicoil-noexp-0shot
 corpus_path: collections/msmarco/msmarco_v2_passage_unicoil_noexp_0shot/
 
+download_url: https://rgw.cs.uwaterloo.ca/JIMMYLIN-bucket0/data/msmarco_v2_passage_unicoil_noexp_0shot.tar
+download_checksum: d9cc1ed3049746e68a2c91bf90e5212d
+download_corpus: msmarco_v2_passage_unicoil_noexp_0shot
+
 index_path: indexes/lucene-index.msmarco-v2-passage-unicoil-noexp-0shot/
 collection_class: JsonVectorCollection
 generator_class: DefaultLuceneDocumentGenerator
diff --git a/src/test/java/io/anserini/doc/DataModel.java b/src/test/java/io/anserini/doc/DataModel.java
index 99a91213f3..eede3a99ed 100755
--- a/src/test/java/io/anserini/doc/DataModel.java
+++ b/src/test/java/io/anserini/doc/DataModel.java
@@ -47,6 +47,34 @@ public void setCorpus_path(String corpus_path) {
     this.corpus_path = corpus_path;
   }
 
+  private String download_url;
+  private String download_checksum;
+  private String download_corpus;
+
+  public String getDownload_url() {
+    return download_url;
+  }
+
+  public void setDownload_url(String download_url) {
+    this.download_url = download_url;
+  }
+
+  public String getDownload_checksum() {
+    return download_checksum;
+  }
+
+  public void setDownload_checksum(String download_checksum) {
+    this.download_checksum = download_checksum;
+  }
+
+  public String getDownload_corpus() {
+    return download_corpus;
+  }
+
+  public void setDownload_corpus(String download_corpus) {
+    this.download_corpus = download_corpus;
+  }
+
   private String index_path;
   private String collection_class;
   private String generator_class;