add tests for prediction API (#19)

* add tests for prediction API * update tests to use spec strings instead of dict payloads * predict -> predict_single * split up `parse_alloc_spec` version checks Co-authored-by: Alec Scott <[email protected]> * style --------- Co-authored-by: Alec Scott <[email protected]>
spack · May 8, 2024 · 5729b22 · 5729b22
1 parent 0965e06
commit 5729b22
Show file tree

Hide file tree

Showing 3 changed files with 155 additions and 0 deletions.
diff --git a/gantry/tests/defs/prediction.py b/gantry/tests/defs/prediction.py
@@ -0,0 +1,36 @@
+# flake8: noqa
+# fmt: off
+
+from gantry.util.spec import parse_alloc_spec
+
+NORMAL_BUILD = parse_alloc_spec(
+    "[email protected] ~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack build_system=python_pip cuda_arch=80%[email protected]"
+)
+
+# everything in NORMAL_BUILD["package"]["variants"] except removing build_system=python_pip
+# in order to test the expensive variants filter
+EXPENSIVE_VARIANT_BUILD = parse_alloc_spec(
+    "[email protected] ~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack cuda_arch=80%[email protected]"
+)
+
+# no variants should match this, so we expect the default prediction
+BAD_VARIANT_BUILD = parse_alloc_spec(
+    "[email protected] +no~expensive~variants+match%[email protected]"
+)
+
+# calculated by running the baseline prediction algorithm on the sample data in gantry/tests/sql/insert_prediction.sql
+NORMAL_PREDICTION = {
+    "variables": {
+        "KUBERNETES_CPU_REQUEST": "12",
+        "KUBERNETES_MEMORY_REQUEST": "9576M",
+    },
+}
+
+# this is what will get returned when there are no samples in the database
+# that match what the client wants
+DEFAULT_PREDICTION = {
+    "variables": {
+        "KUBERNETES_CPU_REQUEST": "1",
+        "KUBERNETES_MEMORY_REQUEST": "2000M",
+    },
+}
diff --git a/gantry/tests/sql/insert_samples.sql b/gantry/tests/sql/insert_samples.sql
@@ -0,0 +1,6 @@
+INSERT INTO nodes VALUES(6789,'ec2c47a0-7e9b-cfa3-9ad4-ac227ade598d','ip-192-168-202-150.ec2.internal',32.0,131072000000.0,'amd64','linux','m5.8xlarge');
+INSERT INTO jobs VALUES(6781,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi1',6789,1708919572.983000041,1708924744.811000108,101502092,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,9.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9652098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
+INSERT INTO jobs VALUES(6782,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi2',6789,1708919572.983000041,1708924744.811000108,101502093,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,10.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9958098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
+INSERT INTO jobs VALUES(6783,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi3',6789,1708919572.983000041,1708924744.811000108,101502094,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,11.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9158098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
+INSERT INTO jobs VALUES(6784,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi4',6789,1708919572.983000041,1708924744.811000108,101502095,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,12.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9758098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
+INSERT INTO jobs VALUES(6785,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi5',6789,1708919572.983000041,1708924744.811000108,101502096,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,13.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9358098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
diff --git a/gantry/tests/test_prediction.py b/gantry/tests/test_prediction.py
@@ -0,0 +1,113 @@
+import pytest
+
+from gantry.routes.prediction import prediction
+from gantry.tests.defs import prediction as defs
+from gantry.util.spec import parse_alloc_spec
+
+
+@pytest.fixture
+async def db_conn_inserted(db_conn):
+    """Returns a connection to a database with 5 samples inserted"""
+
+    with open("gantry/tests/sql/insert_samples.sql") as f:
+        await db_conn.executescript(f.read())
+
+    return db_conn
+
+
+async def test_exact_match(db_conn_inserted):
+    """All fields are an exact match for 5 samples in the database."""
+
+    assert (
+        await prediction.predict(db_conn_inserted, defs.NORMAL_BUILD)
+        == defs.NORMAL_PREDICTION
+    )
+
+
+async def test_expensive_variants(db_conn_inserted):
+    """
+    Tests whether the algorithm filters by expensive variants.
+    The input has been modified to prevent an exact match with
+    any of the samples.
+    """
+
+    assert (
+        await prediction.predict(db_conn_inserted, defs.EXPENSIVE_VARIANT_BUILD)
+        == defs.NORMAL_PREDICTION
+    )
+
+
+async def test_no_variant_match(db_conn_inserted):
+    """
+    All fields match except for variants, expect default predictions with no sample.
+    """
+
+    assert (
+        await prediction.predict(db_conn_inserted, defs.BAD_VARIANT_BUILD)
+        == defs.DEFAULT_PREDICTION
+    )
+
+
+async def test_partial_match(db_conn_inserted):
+    """
+    Some of the fields match, so the prediction should be based on matching
+    with other fields. In reality, we're using the same dataset but just
+    testing that the prediction will be the same with a different compiler name.
+    """
+
+    # same as NORMAL_BUILD, but with a different compiler name to test partial matching
+    diff_compiler_build = defs.NORMAL_BUILD.copy()
+    diff_compiler_build["compiler_name"] = "gcc-different"
+
+    assert (
+        await prediction.predict(db_conn_inserted, diff_compiler_build)
+        == defs.NORMAL_PREDICTION
+    )
+
+
+async def test_empty_sample(db_conn):
+    """No samples in the database, so we expect default predictions."""
+
+    assert (
+        await prediction.predict(db_conn, defs.NORMAL_BUILD) == defs.DEFAULT_PREDICTION
+    )
+
+
+# Test validate_payload
+def test_valid_spec():
+    """Tests that a valid spec is parsed correctly."""
+    assert parse_alloc_spec("[email protected] +json+native+treesitter%[email protected]") == {
+        "pkg_name": "emacs",
+        "pkg_version": "29.2",
+        "pkg_variants": '{"json": true, "native": true, "treesitter": true}',
+        "pkg_variants_dict": {"json": True, "native": True, "treesitter": True},
+        "compiler_name": "gcc",
+        "compiler_version": "12.3.0",
+    }
+
+
+def test_invalid_specs():
+    """Test a series of invalid specs"""
+
+    # not a spec
+    assert parse_alloc_spec("hi") == {}
+
+    # missing package
+    assert parse_alloc_spec("@29.2 +json+native+treesitter%[email protected]") == {}
+
+    # missing compiler
+    assert parse_alloc_spec("[email protected] +json+native+treesitter") == {}
+
+    # variants not spaced correctly
+    assert parse_alloc_spec("[email protected]+json+native+treesitter%[email protected]") == {}
+
+    # missing compiler version
+    assert parse_alloc_spec("[email protected] +json+native+treesitter%gcc@") == {}
+    assert parse_alloc_spec("[email protected] +json+native+treesitter%gcc") == {}
+
+    # missing package version
+    assert parse_alloc_spec("emacs@ +json+native+treesitter%[email protected]") == {}
+    assert parse_alloc_spec("emacs+json+native+treesitter%[email protected]") == {}
+
+    # invalid variants
+    assert parse_alloc_spec("[email protected] this_is_not_a_thing%[email protected]") == {}