tests: improve performance of tests by caching repos

SwissDataScienceCenter · May 25, 2023 · 8adb4f8 · 8adb4f8
1 parent e02e5bf
commit 8adb4f8
Show file tree

Hide file tree

Showing 29 changed files with 360 additions and 262 deletions.
diff --git a/.github/workflows/test_deploy.yml b/.github/workflows/test_deploy.yml
@@ -14,7 +14,8 @@ on:
     branches:
       - "**"
       - "!master"
-
+env:
+  RENKU_TEST_RECREATE_CACHE: "${{ (endsWith(github.ref, 'master') || endsWith(github.ref, 'develop' || startsWith(github.ref, 'refs/tags/') || startsWith(github.ref, 'refs/heads/release/' ) ) && '1' || '0' }}"
 jobs:
   set-matrix:
     runs-on: ubuntu-latest

diff --git a/tests/cli/fixtures/cli_workflow.py b/tests/cli/fixtures/cli_workflow.py
@@ -19,19 +19,22 @@
 
 
 @pytest.fixture
-def workflow_graph(run_shell, project):
+def workflow_graph(run_shell, project, cache_test_project):
     """Setup a project with a workflow graph."""
+    cache_test_project.set_name("workflow_graph_fixture")
+    if not cache_test_project.setup():
 
-    def _run_workflow(name, command, extra_args=""):
-        output = run_shell(f"renku run --name {name} {extra_args} -- {command}")
-        # Assert not allocated stderr.
-        assert output[1] is None
+        def _run_workflow(name, command, extra_args=""):
+            output = run_shell(f"renku run --name {name} {extra_args} -- {command}")
+            # Assert not allocated stderr.
+            assert output[1] is None
 
-    _run_workflow("r1", "echo 'test' > A")
-    _run_workflow("r2", "tee B C < A")
-    _run_workflow("r3", "cp A Z")
-    _run_workflow("r4", "cp B X")
-    _run_workflow("r5", "cat C Z > Y")
-    _run_workflow("r6", "bash -c 'cat X Y | tee R S'", extra_args="--input X --input Y --output R --output S")
-    _run_workflow("r7", "echo 'other' > H")
-    _run_workflow("r8", "tee I J < H")
+        _run_workflow("r1", "echo 'test' > A")
+        _run_workflow("r2", "tee B C < A")
+        _run_workflow("r3", "cp A Z")
+        _run_workflow("r4", "cp B X")
+        _run_workflow("r5", "cat C Z > Y")
+        _run_workflow("r6", "bash -c 'cat X Y | tee R S'", extra_args="--input X --input Y --output R --output S")
+        _run_workflow("r7", "echo 'other' > H")
+        _run_workflow("r8", "tee I J < H")
+        cache_test_project.save()
diff --git a/tests/cli/test_graph.py b/tests/cli/test_graph.py
@@ -27,14 +27,16 @@
 
 
 @pytest.mark.parametrize("revision", ["", "HEAD", "HEAD^", "HEAD^..HEAD"])
-def test_graph_export_validation(runner, project, directory_tree, run, revision):
+def test_graph_export_validation(runner, project, directory_tree, run, revision, cache_test_project):
     """Test graph validation when exporting."""
-    assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-data", str(directory_tree)]).exit_code
+    if not cache_test_project.setup():
+        assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-data", str(directory_tree)]).exit_code
 
-    file1 = project.path / DATA_DIR / "my-data" / directory_tree.name / "file1"
-    file2 = project.path / DATA_DIR / "my-data" / directory_tree.name / "dir1" / "file2"
-    assert 0 == run(["run", "head", str(file1)], stdout="out1")
-    assert 0 == run(["run", "tail", str(file2)], stdout="out2")
+        file1 = project.path / DATA_DIR / "my-data" / directory_tree.name / "file1"
+        file2 = project.path / DATA_DIR / "my-data" / directory_tree.name / "dir1" / "file2"
+        assert 0 == run(["run", "head", str(file1)], stdout="out1")
+        assert 0 == run(["run", "tail", str(file2)], stdout="out2")
+        cache_test_project.save()
 
     result = runner.invoke(cli, ["graph", "export", "--format", "json-ld", "--strict", "--revision", revision])
 
@@ -57,12 +59,14 @@ def test_graph_export_validation(runner, project, directory_tree, run, revision)
 
 @pytest.mark.serial
 @pytest.mark.shelled
-def test_graph_export_strict_run(runner, project, run_shell):
+def test_graph_export_strict_run(runner, project, run_shell, cache_test_project):
     """Test graph export output of run command."""
-    # Run a shell command with pipe.
-    assert run_shell('renku run --name run1 echo "my input string" > my_output_file')[1] is None
-    assert run_shell("renku run --name run2 cp my_output_file my_output_file2")[1] is None
-    assert run_shell("renku workflow compose my-composite-plan run1 run2")[1] is None
+    if not cache_test_project.setup():
+        # Run a shell command with pipe.
+        assert run_shell('renku run --name run1 echo "my input string" > my_output_file')[1] is None
+        assert run_shell("renku run --name run2 cp my_output_file my_output_file2")[1] is None
+        assert run_shell("renku workflow compose my-composite-plan run1 run2")[1] is None
+        cache_test_project.save()
 
     # Assert created output file.
     result = runner.invoke(cli, ["graph", "export", "--full", "--strict", "--format=json-ld"])
@@ -80,21 +84,23 @@ def test_graph_export_strict_run(runner, project, run_shell):
     assert 0 == result.exit_code, format_result_exception(result)
 
 
-def test_graph_export_strict_dataset(tmpdir, runner, project, subdirectory):
+def test_graph_export_strict_dataset(tmpdir, runner, project, subdirectory, cache_test_project):
     """Test output of graph export for dataset add."""
-    result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
-    assert 0 == result.exit_code, format_result_exception(result)
-    paths = []
-    test_paths = []
-    for i in range(3):
-        new_file = tmpdir.join(f"file_{i}")
-        new_file.write(str(i))
-        paths.append(str(new_file))
-        test_paths.append(os.path.relpath(str(new_file), str(project.path)))
-
-    # add data
-    result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset"] + paths)
-    assert 0 == result.exit_code, format_result_exception(result)
+    with cache_test_project.setup():
+        result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
+        assert 0 == result.exit_code, format_result_exception(result)
+        paths = []
+        test_paths = []
+        for i in range(3):
+            new_file = tmpdir.join(f"file_{i}")
+            new_file.write(str(i))
+            paths.append(str(new_file))
+            test_paths.append(os.path.relpath(str(new_file), str(project.path)))
+
+        # add data
+        result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset"] + paths)
+        assert 0 == result.exit_code, format_result_exception(result)
+        cache_test_project.save()
 
     result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld", "--revision", "HEAD"])
     assert 0 == result.exit_code, format_result_exception(result)

diff --git a/tests/cli/test_merge.py b/tests/cli/test_merge.py
@@ -24,96 +24,98 @@
 from tests.utils import format_result_exception
 
 
-def test_mergetool(runner, project, directory_tree, run_shell, with_injection):
+def test_mergetool(runner, project, directory_tree, run_shell, with_injection, cache_test_project):
     """Test that merge tool can merge renku metadata."""
-    result = runner.invoke(cli, ["mergetool", "install"])
+    if not cache_test_project.setup():
+        result = runner.invoke(cli, ["mergetool", "install"])
 
-    assert 0 == result.exit_code, format_result_exception(result)
+        assert 0 == result.exit_code, format_result_exception(result)
 
-    # create a common dataset
-    result = runner.invoke(
-        cli, ["dataset", "add", "--copy", "--create", "shared-dataset", str(directory_tree)], catch_exceptions=False
-    )
-    assert 0 == result.exit_code, format_result_exception(result)
+        # create a common dataset
+        result = runner.invoke(
+            cli, ["dataset", "add", "--copy", "--create", "shared-dataset", str(directory_tree)], catch_exceptions=False
+        )
+        assert 0 == result.exit_code, format_result_exception(result)
 
-    # Create a common workflow
-    output = run_shell('renku run --name "shared-workflow" echo "a unique string" > my_output_file')
+        # Create a common workflow
+        output = run_shell('renku run --name "shared-workflow" echo "a unique string" > my_output_file')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    # switch to a new branch
-    output = run_shell("git checkout -b remote-branch")
+        # switch to a new branch
+        output = run_shell("git checkout -b remote-branch")
 
-    assert b"Switched to a new branch 'remote-branch'\n" == output[0]
-    assert output[1] is None
+        assert b"Switched to a new branch 'remote-branch'\n" == output[0]
+        assert output[1] is None
 
-    # edit the dataset
-    result = runner.invoke(cli, ["dataset", "edit", "-d", "remote description", "shared-dataset"])
-    assert 0 == result.exit_code, format_result_exception(result)
+        # edit the dataset
+        result = runner.invoke(cli, ["dataset", "edit", "-d", "remote description", "shared-dataset"])
+        assert 0 == result.exit_code, format_result_exception(result)
 
-    result = runner.invoke(
-        cli, ["dataset", "add", "--copy", "--create", "remote-dataset", str(directory_tree)], catch_exceptions=False
-    )
-    assert 0 == result.exit_code, format_result_exception(result)
+        result = runner.invoke(
+            cli, ["dataset", "add", "--copy", "--create", "remote-dataset", str(directory_tree)], catch_exceptions=False
+        )
+        assert 0 == result.exit_code, format_result_exception(result)
 
-    # Create a new workflow
-    output = run_shell('renku run --name "remote-workflow" echo "a unique string" > remote_output_file')
+        # Create a new workflow
+        output = run_shell('renku run --name "remote-workflow" echo "a unique string" > remote_output_file')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    # Create a downstream workflow
-    output = run_shell('renku run --name "remote-downstream-workflow" cp my_output_file my_remote_downstream')
+        # Create a downstream workflow
+        output = run_shell('renku run --name "remote-downstream-workflow" cp my_output_file my_remote_downstream')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    # Create another downstream workflow
-    output = run_shell('renku run --name "remote-downstream-workflow2" cp remote_output_file my_remote_downstream2')
+        # Create another downstream workflow
+        output = run_shell('renku run --name "remote-downstream-workflow2" cp remote_output_file my_remote_downstream2')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    # Edit the project metadata
-    result = runner.invoke(cli, ["project", "edit", "-k", "remote"])
+        # Edit the project metadata
+        result = runner.invoke(cli, ["project", "edit", "-k", "remote"])
 
-    assert 0 == result.exit_code, format_result_exception(result)
+        assert 0 == result.exit_code, format_result_exception(result)
 
-    # Switch back to master
-    output = run_shell("git checkout master")
+        # Switch back to master
+        output = run_shell("git checkout master")
 
-    assert b"Switched to branch 'master'\n" == output[0]
-    assert output[1] is None
+        assert b"Switched to branch 'master'\n" == output[0]
+        assert output[1] is None
 
-    # Add a new dataset
-    result = runner.invoke(
-        cli, ["dataset", "add", "--copy", "--create", "local-dataset", str(directory_tree)], catch_exceptions=False
-    )
-    assert 0 == result.exit_code, format_result_exception(result)
+        # Add a new dataset
+        result = runner.invoke(
+            cli, ["dataset", "add", "--copy", "--create", "local-dataset", str(directory_tree)], catch_exceptions=False
+        )
+        assert 0 == result.exit_code, format_result_exception(result)
 
-    # Create a local workflow
-    output = run_shell('renku run --name "local-workflow" echo "a unique string" > local_output_file')
+        # Create a local workflow
+        output = run_shell('renku run --name "local-workflow" echo "a unique string" > local_output_file')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    # Create a local downstream workflow
-    output = run_shell('renku run --name "local-downstream-workflow" cp my_output_file my_local_downstream')
+        # Create a local downstream workflow
+        output = run_shell('renku run --name "local-downstream-workflow" cp my_output_file my_local_downstream')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    # Create another local downstream workflow
-    output = run_shell('renku run --name "local-downstream-workflow2" cp local_output_file my_local_downstream2')
+        # Create another local downstream workflow
+        output = run_shell('renku run --name "local-downstream-workflow2" cp local_output_file my_local_downstream2')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    # Edit the project in master as well
-    result = runner.invoke(cli, ["project", "edit", "-k", "local"])
+        # Edit the project in master as well
+        result = runner.invoke(cli, ["project", "edit", "-k", "local"])
 
-    assert 0 == result.exit_code, format_result_exception(result)
+        assert 0 == result.exit_code, format_result_exception(result)
+        cache_test_project.save()
 
     # Merge branches
     output = run_shell("git merge --no-edit remote-branch")
@@ -146,32 +148,34 @@ def test_mergetool(runner, project, directory_tree, run_shell, with_injection):
     assert "remote description" == shared_dataset.description
 
 
-def test_mergetool_workflow_conflict(runner, project, run_shell, with_injection):
+def test_mergetool_workflow_conflict(runner, project, run_shell, with_injection, cache_test_project):
     """Test that merge tool can merge conflicting workflows."""
-    result = runner.invoke(cli, ["mergetool", "install"])
+    if not cache_test_project.setup():
+        result = runner.invoke(cli, ["mergetool", "install"])
 
-    assert 0 == result.exit_code, format_result_exception(result)
+        assert 0 == result.exit_code, format_result_exception(result)
 
-    output = run_shell('renku run --name "shared-workflow" echo "a unique string" > my_output_file')
+        output = run_shell('renku run --name "shared-workflow" echo "a unique string" > my_output_file')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    # Switch to a new branch and create some workflows
-    output = run_shell("git checkout -b remote-branch")
+        # Switch to a new branch and create some workflows
+        output = run_shell("git checkout -b remote-branch")
 
-    assert b"Switched to a new branch 'remote-branch'\n" == output[0]
-    assert output[1] is None
+        assert b"Switched to a new branch 'remote-branch'\n" == output[0]
+        assert output[1] is None
 
-    output = run_shell('renku run --name "remote-workflow" cp my_output_file out1')
+        output = run_shell('renku run --name "remote-workflow" cp my_output_file out1')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    output = run_shell('renku run --name "common-name" cp my_output_file out2')
+        output = run_shell('renku run --name "common-name" cp my_output_file out2')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
+        cache_test_project.save()
 
     with with_injection():
         plan_gateway = PlanGateway()
@@ -237,30 +241,33 @@ def test_mergetool_workflow_conflict(runner, project, run_shell, with_injection)
     assert len(plans) == 4
 
 
-def test_mergetool_workflow_complex_conflict(runner, project, run_shell, with_injection):
+def test_mergetool_workflow_complex_conflict(runner, project, run_shell, with_injection, cache_test_project):
     """Test that merge tool can merge complex conflicts in workflows."""
-    result = runner.invoke(cli, ["mergetool", "install"])
+    if not cache_test_project.setup():
+        result = runner.invoke(cli, ["mergetool", "install"])
 
-    assert 0 == result.exit_code, format_result_exception(result)
+        assert 0 == result.exit_code, format_result_exception(result)
 
-    output = run_shell('renku run --name "shared-workflow" echo "a unique string" > my_output_file')
+        output = run_shell('renku run --name "shared-workflow" echo "a unique string" > my_output_file')
 
-    assert b"" == output[0]
-    assert output[1] is None
+        assert b"" == output[0]
+        assert output[1] is None
 
-    # Switch to a new branch and create some workflows
-    output = run_shell("git checkout -b remote-branch")
+        # Switch to a new branch and create some workflows
+        output = run_shell("git checkout -b remote-branch")
 
-    assert b"Switched to a new branch 'remote-branch'\n" == output[0]
-    assert output[1] is None
+        assert b"Switched to a new branch 'remote-branch'\n" == output[0]
+        assert output[1] is None
 
-    output = run_shell('renku run --name "intermediate-workflow" cp my_output_file intermediate')
+        output = run_shell('renku run --name "intermediate-workflow" cp my_output_file intermediate')
 
-    assert b"" == output[0]
+        assert b"" == output[0]
 
-    output = run_shell('renku run --name "final-workflow" cp intermediate final')
+        output = run_shell('renku run --name "final-workflow" cp intermediate final')
 
-    assert b"" == output[0]
+        assert b"" == output[0]
+
+        cache_test_project.save()
 
     with with_injection():
         plan_gateway = PlanGateway()