nasa · asteiker · Mar 8, 2021 · Feb 5, 2021 · Feb 10, 2021 · Feb 11, 2021
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,5 @@ output/*
 .env
 .deployenv
 .identity
+.netrc
+
diff --git a/README.md b/README.md
@@ -2,58 +2,112 @@
 
 # Running the Tests
 
+Each test suite is run in a separate Docker container using a temporary image built at test time.
+`conda` is used for dependency management. The two steps for each test suite are building and
+running the associated image.
+
 ## Install Prerequisites
 
 * [Docker](https://www.docker.com/get-started)
 
-## Build the Image & Run the Container
+## Build the Images
 
     $ cd test
-    $ make image
-    $ make run
-
-By default this will run the tests against the UAT environment. To run
-against a specific environment:
-
-    $ make run environment=prod
-
-Valid environment values are: sbx, sit, uat, prod.
-
-# Notebook Development
+    $ make images
 
-**Note** - this section applies to the contents of the `test` directory
+`make -j images` can be used to make the images in parallel (faster), although this may lead to
+Docker Desktop instabilities
 
-These prerequisites and steps are only needed if you want to do local
-development on the project. 
+## Create Terraform Autovars File
+In the `terraform` directory create a file called `key.auto.tfvars` and
+add a single line indicating the name of the ssh public key file that
+should be used for the EC2 instance that runs the notebooks.
 
-## Prerequisites
+This file name is the name of the S3 file created in the Harmony ssh key bucket as described in the Harmony project README.md.
 
-* [pyenv](https://github.com/pyenv/pyenv)
-* [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv)
-* [poetry](https://python-poetry.org/)
+Example:
+```
+key_name = "harmony-sit-my-key-name"
+```
 
-## Install Python 3.8 (if needed)
+## Run the notebooks
 
-    $ pyenv install 3.8.5
+    $ cd test
+    $ export HARMONY_HOST_URL=<url of Harmony in the target environment>
+    $ ./run_notebooks.sh
 
-## Install dependencies
+Outputs will be in the `output` directory. 
+`HARMONY_HOST_URL` for SIT would be `https://harmony.sit.earthdata.nasa.gov`
 
-    $ pyenv virtualenv 3.8.5 harmony-rt
-    $ pyenv local harmony-rt
-    $ pyenv activate harmony-rt
-    $ poetry install
-    $ pyenv rehash
+# Running the Tests in AWS
+First create a `.env` file in the top level directory by copying in the `dot_env` file and filling
+in the proper values. Then execute the following.
 
-## Run the notebooks
+    $ cd script
+    $ export HARMONY_ENVIRONMENT=<uat|sit|sandbox|prod>
+    $ ./test.sh
 
-    $ ./run_notebooks harmony_host_url=<url of Harmony in the target environment>
-
-e.g., 
-
-    $ ./run_notebooks harmony_host_url="https://harmony.sit.earthdata.nasa.gov"
+Output will be in the bucket specified with the `REGRESSION_TEST_OUTPUT_BUCKET` environment 
+variable with a folder for each notebook 
 
-Outputs will be in the `output` directory
-
-## Start JupyterLab
+# Notebook Development
 
-    $ jupyter-lab
+Notebooks and support files should be placed in a subdirectory of the `test` directory.
+
+For example, in the `harmony` directory we have
+
+```
+├── Harmony.ipynb
+├── __init__.py
+├── environment.yaml
+└── util.py
+```
+
+ Notebook dependencies should be listed in file named `environment.yaml` at the top level of the
+ subdirectory. The `name` field in the file should be `papermill`. For example:
+
+ ```yaml
+ name: papermill
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.7
+  - jupyter
+  - requests
+  - netcdf4
+  - matplotlib
+  - papermill
+  - pytest
+  - ipytest
+```
+
+## Generating a Dependency Lockfile
+To increase runtime efficiency, the build relies on [conda-lock](https://pypi.org/project/conda-lock/). This is used to create a dependency lockfile that can be used 
+by conda to more efficiently load dependencies. The Docker build expects a lockfile
+named `conda-linux-64.lock` to exist at the top level of a notebook directory (next to
+the `environment.yaml` file).
+
+To build the lockflie install `conda-lock` by following the directions provided on its website. Then generate the lockfile for your notebook by running the following:
+```
+conda-lock -f environment.yaml -p linux-64
+```
+
+Test notebooks should not rely on other forms of dependency management or expect user input.
+They _should_ utilize the `harmony_host_url` global variable to communicate with Harmony
+or to determine the Harmony environment. This variable is set by `papermill` - see the 
+`Harmony.ipynb` for how to make use of this variable. More information can be found
+in the [papermill](https://papermill.readthedocs.io/en/latest/usage-parameterize.html)
+documentation on setting parameters.
+
+New test suites must be added to the `Makefile`. A new `name-image` target (where name is the name of
+the test suite) should be added (see the `harmony-image` example), and the new image target
+should be added as a dependency of the `images` target. The docker image should have a name like
+`harmony/regression-tests-<base_name>`, where `base_name` is the name of the test suite. 
+
+Finally, add the image base name to the `images` array on line 6 of the `run_notebooks.sh` file.
+For instance, if the image is named `harmony/regression-tests-foo`, then we would add `foo` to the
+array.
+
+The `run_notebooks.sh` file can be used as described above to run the test suite. Notebooks are
+expected to exit with a non-zero exit code on failure when run from `papermill`.
diff --git a/dot_env b/dot_env
@@ -0,0 +1,6 @@
+REGRESSION_TEST_OUTPUT_BUCKET=<some bucket like harmony-sit-regression-tests>
+AWS_ACCESS_KEY_ID=<KEY ID>
+AWS_SECRET_ACCESS_KEY=<SECRET KEY>
+EDL_USER=harmony_dev_user
+EDL_PASSWORD=<HARMONY DEV USER PASSWORD>
+SECRET_KEY_FILE=<path to unencrypted (no passphrase) private key file>
diff --git a/script/deploy-from-docker.sh b/script/deploy-from-docker.sh
@@ -30,11 +30,15 @@ function retry {
   return 0
 }
 
+# copy the test directory to the EC2 instance
 retry 5 scp -v -F sshconfig -i .identity -r test "ec2-user@${INSTANCE_ID}:"
+# create a .netrc file on the EC2 instance
+netrc_default="machine urs.earthdata.nasa.gov login ${EDL_USER} password ${EDL_PASSWORD}\nmachine uat.urs.earthdata.nasa.gov login ${EDL_USER} password ${EDL_PASSWORD}"
+retry 5 ssh -F sshconfig -i .identity "ec2-user@${INSTANCE_ID}" "echo -e \"${netrc_default}\" > ./test/.netrc"
 # It can take a couple minutes for docker to be available on the instance
-retry 10 ssh -F sshconfig -i .identity "ec2-user@${INSTANCE_ID}" "cd test && make image"
+retry 10 ssh -F sshconfig -i .identity "ec2-user@${INSTANCE_ID}" "cd test && make -j images"
 set +e
-ssh -F sshconfig -i .identity "ec2-user@${INSTANCE_ID}" "cd test && make run HARMONY_HOST_URL=${HARMONY_HOST_URL}"
+ssh -v -F sshconfig -i .identity "ec2-user@${INSTANCE_ID}" "cd test && export HARMONY_HOST_URL=${HARMONY_HOST_URL} && ./run_notebooks.sh"
 exit_code=$?
 set -e
 # copy the output to here

diff --git a/script/test.sh b/script/test.sh
@@ -12,6 +12,36 @@ function get_elb {
   echo $(aws elbv2 describe-load-balancers | jq --arg host "harmony-$HARMONY_ENVIRONMENT-frontend" '.LoadBalancers[] | select(.LoadBalancerName == $host) | .DNSName' | tr -d '"')
 }
 
+cd ..
+
+deployenv='.deployenv'
+if [ -e $deployenv ]; then
+  rm $deployenv
+fi
+
+if [ -e .env ]; then
+  echo "Using .env file"
+  set -o allexport
+  source .env
+  set +o allexport
+  cp .env $deployenv
+else
+  echo "AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}" >> $deployenv
+  echo "AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}" >> $deployenv
+  echo "REGRESSION_TEST_OUTPUT_BUCKET=${REGRESSION_TEST_OUTPUT_BUCKET}" >> $deployenv
+fi
+
+export AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-west-2}"
+echo "AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}" >> $deployenv
+
+# create the test environment
+cd ./terraform
+terraform init
+terraform apply -auto-approve -var "environment_name=${HARMONY_ENVIRONMENT}"
+instance_id=$(terraform output -json harmony_regression_test_instance_id | jq -r .id)
+
+echo "intance_id = ${instance_id}"
+
 case $HARMONY_ENVIRONMENT in
 uat)
   harmony_host_url="https://harmony.uat.earthdata.nasa.gov"
@@ -28,13 +58,7 @@ sit|sandbox)
   ;;
 esac
 
-output_bucket="${REGRESSION_TEST_OUTPUT_BUCKET}"
-
-# create the test environment
-cd ../terraform
-terraform init
-terraform apply -auto-approve -var "environment_name=${HARMONY_ENVIRONMENT}"
-instance_id=$(terraform output -json harmony_regression_test_instance_id | jq -r .id)
+echo "harmony host url: ${harmony_host_url}"
 
 cd ..
 
@@ -48,32 +72,15 @@ else
 fi
 chmod 0600 $identity
 
-AWS_DEFAULT_REGION="${AWS_DEFAULT_REGION:-us-west-2}"
-
-deployenv='.deployenv'
-if [ -e $deployenv ]; then
-  rm $deployenv
-fi
-
-if [ -e .env ]; then
-  set -o allexport
-  source .env
-  set +o allexport
-  cp .env $deployenv
-else
-  echo "AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}" >> $deployenv
-  echo "AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}" >> $deployenv
-fi
-
 echo "INSTANCE_ID=${instance_id}" >> $deployenv
 echo "HARMONY_HOST_URL=${harmony_host_url}" >> $deployenv
-echo "AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION}" >> $deployenv
-echo "REGRESSION_TEST_OUTPUT_BUCKET=${output_bucket}" >> $deployenv
 
 ./script/build-image.sh
 
 docker run --rm \
   -v $(pwd):/tmp \
+  -e EDL_USERNAME \
+  -e EDL_PASSWORD \
   harmony/regression-tests \
   './script/deploy-from-docker.sh'
 

diff --git a/sshconfig b/sshconfig
@@ -4,3 +4,4 @@ Host i-*
     StrictHostKeyChecking no
     UserKnownHostsFile /dev/null
     LogLevel ERROR
+    ServerAliveInterval 60
diff --git a/terraform/inputs.tf b/terraform/inputs.tf
@@ -4,12 +4,12 @@ variable "aws_region" {
 }
 
 variable "instance_type" {
-  description = "EC2 instance type for the harmony application"
-  default     = "t2.medium"
+  description = "EC2 instance type for the regression test runner"
+  default     = "t2.xlarge"
 }
 
 variable "key_name" {
-  description = "Key pair name to use for the harmony EC2 instance."
+  description = "Key pair name to use for the harmony regression test instance."
   default     = "bamboo"
 }
 

diff --git a/terraform/main.tf b/terraform/main.tf
@@ -45,6 +45,10 @@ resource "aws_instance" "harmony_regression_test" {
 
   user_data = file("${path.module}/harmony-user-data.tmpl")
 
+  root_block_device {
+    volume_size = 256
+  }
+
   vpc_security_group_ids = [aws_security_group.harmony_regression_test.id]
   tags = {
     Name = "harmony-regression-test-${var.environment_name}"

diff --git a/test/Dockerfile b/test/Dockerfile
@@ -1,16 +1,21 @@
-FROM python:3.8.7-buster
+FROM continuumio/miniconda3:latest
 
-WORKDIR /opt/harmony
+ARG sub_dir
+ARG notebook
+ENV env_sub_dir=$sub_dir
+ENV env_notebook=$notebook
 
-RUN pip install poetry
-RUN mkdir -p ./output
+WORKDIR /root
 
-COPY pyproject.toml .
-COPY poetry.lock .
-RUN poetry install
+RUN conda config --add channels conda-forge
+RUN pip install conda-lock
+RUN conda install conda-lock
 
-COPY notebooks ./notebooks
-COPY harmony ./harmony
-COPY run_notebooks.sh .
+COPY .netrc .netrc
+RUN mkdir ./${sub_dir}
+COPY ${sub_dir}/conda-linux-64.lock ./${sub_dir}
+RUN ls ${sub_dir}
 
-ENTRYPOINT ./run_notebooks.sh -p harmony_host_url $harmony_host_url
+RUN conda create --name papermill --file ./${sub_dir}/conda-linux-64.lock
+
+ENTRYPOINT export PATH=/opt/conda/envs/papermill/bin:$PATH; mkdir /root/output/${env_sub_dir}; conda activate papermilll; papermill --cwd ${env_sub_dir} ${env_sub_dir}/${env_notebook} /root/output/${env_sub_dir}/Results.ipynb -p harmony_host_url $harmony_host_url
diff --git a/test/Makefile b/test/Makefile
@@ -1,8 +1,10 @@
-.PHONY: run
+harmony-image: Dockerfile harmony/environment.yaml
+	docker build -t harmony/regression-tests-harmony:latest -f ./Dockerfile --build-arg notebook=Harmony.ipynb --build-arg sub_dir=harmony .
 
-image: pyproject.toml poetry.lock Dockerfile
-	docker build -t harmony/regression-tests:latest .
+# asf-gdal-image: Dockerfile gdal_subsetter/environment.yaml
+# 	docker build -t harmony/regression-tests-asf-gdal:latest -f ./Dockerfile --build-arg notebook=GDAL_Subsetter_Regression.ipynb --build-arg sub_dir=gdal_subsetter .
 
-run:
-	docker run -v ${PWD}/output:/opt/harmony/output --env harmony_host_url="${HARMONY_HOST_URL}" harmony/regression-tests:latest
+harmony-regression-image: Dockerfile harmony-regression/environment.yaml
+	docker build -t harmony/regression-tests-harmony-regression:latest -f ./Dockerfile --build-arg notebook=HarmonyRegression.ipynb --build-arg sub_dir=harmony-regression .
 
+images: harmony-image harmony-regression-image
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,5 @@ output/* @@
     .env
     .deployenv
     .identity
+    .netrc