-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
RHOAIENG-10449 - Add PR check for additional-demos notebooks
- Loading branch information
1 parent
7c04444
commit a017309
Showing
1 changed file
with
360 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,360 @@ | ||
name: Additional demo notebooks tests | ||
|
||
on: | ||
pull_request: | ||
types: [ labeled ] | ||
|
||
concurrency: | ||
group: ${{ github.head_ref }}-${{ github.workflow }} | ||
cancel-in-progress: true | ||
|
||
env: | ||
CODEFLARE_OPERATOR_IMG: "quay.io/project-codeflare/codeflare-operator:dev" | ||
|
||
jobs: | ||
verify-hf_interactive: | ||
if: ${{ github.event.label.name == 'test-additional-notebooks' }} | ||
runs-on: ubuntu-20.04-4core | ||
|
||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
with: | ||
submodules: recursive | ||
|
||
- name: Checkout common repo code | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: 'project-codeflare/codeflare-common' | ||
ref: 'main' | ||
path: 'common' | ||
|
||
- name: Checkout CodeFlare operator repository | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: project-codeflare/codeflare-operator | ||
path: codeflare-operator | ||
|
||
- name: Set Go | ||
uses: actions/setup-go@v5 | ||
with: | ||
go-version-file: './codeflare-operator/go.mod' | ||
cache-dependency-path: "./codeflare-operator/go.sum" | ||
|
||
- name: Set up gotestfmt | ||
uses: gotesttools/gotestfmt-action@v2 | ||
with: | ||
token: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Set up specific Python version | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.9' | ||
cache: 'pip' # caching pip dependencies | ||
|
||
- name: Setup and start KinD cluster | ||
uses: ./common/github-actions/kind | ||
|
||
- name: Deploy CodeFlare stack | ||
id: deploy | ||
run: | | ||
cd codeflare-operator | ||
echo Setting up CodeFlare stack | ||
make setup-e2e | ||
echo Deploying CodeFlare operator | ||
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e" | ||
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager | ||
cd .. | ||
- name: Setup Additional demo notebooks execution | ||
run: | | ||
echo "Installing papermill and dependencies..." | ||
pip install poetry papermill ipython ipykernel | ||
# Disable virtualenv due to problems using packaged in virtualenv in papermill | ||
poetry config virtualenvs.create false | ||
echo "Installing SDK..." | ||
poetry install --with test,docs | ||
- name: Run hf_interactive.ipynb | ||
run: | | ||
set -euo pipefail | ||
# Remove login/logout cells, as KinD doesn't support authentication using token | ||
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb | ||
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb | ||
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill | ||
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json) | ||
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' hf_interactive.ipynb > hf_interactive.ipynb.tmp && mv hf_interactive.ipynb.tmp hf_interactive.ipynb | ||
# Set explicit namespace as SDK need it (currently) to resolve local queues | ||
sed -i "s/worker_cpu_requests=8,/worker_cpu_requests=1, namespace='default',/" hf_interactive.ipynb | ||
# Change cluster parameters (need to decrease) | ||
sed -i "s/{'nvidia.com/gpu':1}/{'nvidia.com/gpu':0}/g" hf_interactive.ipynb | ||
sed -i "s/worker_cpu_limits=8,/worker_cpu_limits=1,/" hf_interactive.ipynb | ||
sed -i "s/worker_memory_requests=16,/worker_memory_requests=4,/" hf_interactive.ipynb | ||
sed -i "s/worker_memory_limits=8,/worker_memory_limits=4,/" hf_interactive.ipynb | ||
# Run notebook | ||
poetry run papermill hf_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200 | ||
working-directory: demo-notebooks/additional-demos | ||
|
||
- name: Print CodeFlare operator logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
run: | | ||
echo "Printing CodeFlare operator logs" | ||
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log | ||
- name: Print Kueue operator logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
run: | | ||
echo "Printing Kueue operator logs" | ||
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}') | ||
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log | ||
- name: Print KubeRay operator logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
run: | | ||
echo "Printing KubeRay operator logs" | ||
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log | ||
- name: Export all KinD pod logs | ||
uses: ./common/github-actions/kind-export-logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
with: | ||
output-directory: ${TEMP_DIR} | ||
|
||
- name: Upload logs | ||
uses: actions/upload-artifact@v4 | ||
if: always() && steps.deploy.outcome == 'success' | ||
with: | ||
name: logs-verify-hf_interactive | ||
retention-days: 10 | ||
path: | | ||
${{ env.TEMP_DIR }}/**/*.log | ||
verify-local_interactive: | ||
if: ${{ github.event.label.name == 'test-additional-notebooks' }} | ||
runs-on: ubuntu-20.04-4core | ||
|
||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
with: | ||
submodules: recursive | ||
|
||
- name: Checkout common repo code | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: 'project-codeflare/codeflare-common' | ||
ref: 'main' | ||
path: 'common' | ||
|
||
- name: Checkout CodeFlare operator repository | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: project-codeflare/codeflare-operator | ||
path: codeflare-operator | ||
|
||
- name: Set Go | ||
uses: actions/setup-go@v5 | ||
with: | ||
go-version-file: './codeflare-operator/go.mod' | ||
cache-dependency-path: "./codeflare-operator/go.sum" | ||
|
||
- name: Set up gotestfmt | ||
uses: gotesttools/gotestfmt-action@v2 | ||
with: | ||
token: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Set up specific Python version | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.9' | ||
cache: 'pip' # caching pip dependencies | ||
|
||
- name: Setup and start KinD cluster | ||
uses: ./common/github-actions/kind | ||
|
||
- name: Deploy CodeFlare stack | ||
id: deploy | ||
run: | | ||
cd codeflare-operator | ||
echo Setting up CodeFlare stack | ||
make setup-e2e | ||
echo Deploying CodeFlare operator | ||
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e" | ||
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager | ||
cd .. | ||
- name: Setup Additional demo notebooks execution | ||
run: | | ||
echo "Installing papermill and dependencies..." | ||
pip install poetry papermill ipython ipykernel | ||
# Disable virtualenv due to problems using packaged in virtualenv in papermill | ||
poetry config virtualenvs.create false | ||
echo "Installing SDK..." | ||
poetry install --with test,docs | ||
- name: Run local_interactive.ipynb | ||
run: | | ||
set -euo pipefail | ||
# Remove login/logout cells, as KinD doesn't support authentication using token | ||
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb | ||
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb | ||
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill | ||
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json) | ||
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' local_interactive.ipynb > local_interactive.ipynb.tmp && mv local_interactive.ipynb.tmp local_interactive.ipynb | ||
# Set explicit namespace as SDK need it (currently) to resolve local queues | ||
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1, namespace='default',/" local_interactive.ipynb | ||
# Run notebook | ||
poetry run papermill local_interactive.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200 | ||
working-directory: demo-notebooks/additional-demos | ||
|
||
- name: Print CodeFlare operator logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
run: | | ||
echo "Printing CodeFlare operator logs" | ||
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log | ||
- name: Print Kueue operator logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
run: | | ||
echo "Printing Kueue operator logs" | ||
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}') | ||
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log | ||
- name: Print KubeRay operator logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
run: | | ||
echo "Printing KubeRay operator logs" | ||
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log | ||
- name: Export all KinD pod logs | ||
uses: ./common/github-actions/kind-export-logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
with: | ||
output-directory: ${TEMP_DIR} | ||
|
||
- name: Upload logs | ||
uses: actions/upload-artifact@v4 | ||
if: always() && steps.deploy.outcome == 'success' | ||
with: | ||
name: logs-local_interactive | ||
retention-days: 10 | ||
path: | | ||
${{ env.TEMP_DIR }}/**/*.log | ||
verify-ray_job_client: | ||
if: ${{ github.event.label.name == 'test-additional-notebooks' }} | ||
runs-on: ubuntu-20.04-4core | ||
|
||
steps: | ||
- name: Checkout code | ||
uses: actions/checkout@v4 | ||
with: | ||
submodules: recursive | ||
|
||
- name: Checkout common repo code | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: 'project-codeflare/codeflare-common' | ||
ref: 'main' | ||
path: 'common' | ||
|
||
- name: Checkout CodeFlare operator repository | ||
uses: actions/checkout@v4 | ||
with: | ||
repository: project-codeflare/codeflare-operator | ||
path: codeflare-operator | ||
|
||
- name: Set Go | ||
uses: actions/setup-go@v5 | ||
with: | ||
go-version-file: './codeflare-operator/go.mod' | ||
cache-dependency-path: "./codeflare-operator/go.sum" | ||
|
||
- name: Set up gotestfmt | ||
uses: gotesttools/gotestfmt-action@v2 | ||
with: | ||
token: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: Set up specific Python version | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: '3.9' | ||
cache: 'pip' # caching pip dependencies | ||
|
||
- name: Setup and start KinD cluster | ||
uses: ./common/github-actions/kind | ||
|
||
- name: Deploy CodeFlare stack | ||
id: deploy | ||
run: | | ||
cd codeflare-operator | ||
echo Setting up CodeFlare stack | ||
make setup-e2e | ||
echo Deploying CodeFlare operator | ||
make deploy -e IMG="${CODEFLARE_OPERATOR_IMG}" -e ENV="e2e" | ||
kubectl wait --timeout=120s --for=condition=Available=true deployment -n openshift-operators codeflare-operator-manager | ||
cd .. | ||
- name: Setup Additional demo notebooks execution | ||
run: | | ||
echo "Installing papermill and dependencies..." | ||
pip install poetry papermill ipython ipykernel | ||
# Disable virtualenv due to problems using packaged in virtualenv in papermill | ||
poetry config virtualenvs.create false | ||
echo "Installing SDK..." | ||
poetry install --with test,docs | ||
- name: Run ray_job_client.ipynb | ||
run: | | ||
set -euo pipefail | ||
# Remove login/logout cells, as KinD doesn't support authentication using token | ||
jq -r 'del(.cells[] | select(.source[] | contains("Create authentication object for user permissions")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb | ||
jq -r 'del(.cells[] | select(.source[] | contains("auth.logout()")))' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb | ||
# Replace async logs with waiting for job to finish, async logs don't work properly in papermill | ||
JOB_WAIT=$(jq -r '.' ${GITHUB_WORKSPACE}/.github/resources/wait_for_job_cell.json) | ||
jq --argjson job_wait "$JOB_WAIT" -r '(.cells[] | select(.source[] | contains("async for lines in client.tail_job_logs"))) |= $job_wait' ray_job_client.ipynb > ray_job_client.ipynb.tmp && mv ray_job_client.ipynb.tmp ray_job_client.ipynb | ||
# Set explicit namespace as SDK need it (currently) to resolve local queues | ||
sed -i "s/worker_cpu_requests=1,/worker_cpu_requests=1, namespace='default',/" ray_job_client.ipynb | ||
# Run notebook | ||
poetry run papermill ray_job_client.ipynb hf_interactive_out.ipynb --log-output --execution-timeout 1200 | ||
working-directory: demo-notebooks/additional-demos | ||
|
||
- name: Print CodeFlare operator logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
run: | | ||
echo "Printing CodeFlare operator logs" | ||
kubectl logs -n openshift-operators --tail -1 -l app.kubernetes.io/name=codeflare-operator | tee ${TEMP_DIR}/codeflare-operator.log | ||
- name: Print Kueue operator logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
run: | | ||
echo "Printing Kueue operator logs" | ||
KUEUE_CONTROLLER_POD=$(kubectl get pods -n kueue-system | grep kueue-controller | awk '{print $1}') | ||
kubectl logs -n kueue-system --tail -1 ${KUEUE_CONTROLLER_POD} | tee ${TEMP_DIR}/kueue.log | ||
- name: Print KubeRay operator logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
run: | | ||
echo "Printing KubeRay operator logs" | ||
kubectl logs -n ray-system --tail -1 -l app.kubernetes.io/name=kuberay | tee ${TEMP_DIR}/kuberay.log | ||
- name: Export all KinD pod logs | ||
uses: ./common/github-actions/kind-export-logs | ||
if: always() && steps.deploy.outcome == 'success' | ||
with: | ||
output-directory: ${TEMP_DIR} | ||
|
||
- name: Upload logs | ||
uses: actions/upload-artifact@v4 | ||
if: always() && steps.deploy.outcome == 'success' | ||
with: | ||
name: logs-ray_job_client | ||
retention-days: 10 | ||
path: | | ||
${{ env.TEMP_DIR }}/**/*.log |