add tests for prediction API

spack · Mar 7, 2024 · 3c8bc2c · 3c8bc2c
1 parent 31a0aa1
commit 3c8bc2c
Show file tree

Hide file tree

Showing 3 changed files with 181 additions and 0 deletions.
diff --git a/gantry/tests/defs/prediction.py b/gantry/tests/defs/prediction.py
@@ -0,0 +1,64 @@
+# flake8: noqa
+# fmt: off
+
+NORMAL_BUILD = {
+    "hash": "testing",
+    "package": {
+        "name": "py-torch",
+        "version": "2.2.1",
+        "variants": "~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack build_system=python_pip cuda_arch=80",
+    },
+    "compiler": {
+        "name": "gcc",
+        "version": "11.4.0",
+    },
+}
+
+# everything in NORMAL_BUILD["package"]["variants"] except removing build_system=python_pip
+# in order to test the expensive variants filter
+EXPENSIVE_VARIANT_BUILD = {
+    "hash": "testing",
+    "package": {
+        "name": "py-torch",
+        "version": "2.2.1",
+        "variants": "~caffe2+cuda+cudnn~debug+distributed+fbgemm+gloo+kineto~magma~metal+mkldnn+mpi~nccl+nnpack+numa+numpy+onnx_ml+openmp+qnnpack~rocm+tensorpipe~test+valgrind+xnnpack cuda_arch=80",
+    },
+    "compiler": {
+        "name": "gcc",
+        "version": "11.4.0",
+    },
+}
+
+# no variants should match this, so we expect the default prediction
+BAD_VARIANT_BUILD = {
+    "hash": "testing",
+    "package": {
+        "name": "py-torch",
+        "version": "2.2.1",
+        "variants": "+no~expensive~variants+match",
+    },
+    "compiler": {
+        "name": "gcc",
+        "version": "11.4.0",
+    },
+}
+
+# calculated by running the baseline prediction algorithm on the sample data in gantry/tests/sql/insert_prediction.sql
+NORMAL_PREDICTION = {
+    "hash": "testing",
+    "variables": {
+        "KUBERNETES_CPU_REQUEST": "12",
+        "KUBERNETES_MEMORY_REQUEST": "9576M",
+    },
+}
+
+
+# this is what will get returned when there are no samples in the database
+# that match what the client wants
+DEFAULT_PREDICTION = {
+    "hash": "testing",
+    "variables": {
+        "KUBERNETES_CPU_REQUEST": "1",
+        "KUBERNETES_MEMORY_REQUEST": "2000M",
+    },
+}
diff --git a/gantry/tests/sql/insert_samples.sql b/gantry/tests/sql/insert_samples.sql
@@ -0,0 +1,6 @@
+INSERT INTO nodes VALUES(6789,'ec2c47a0-7e9b-cfa3-9ad4-ac227ade598d','ip-192-168-202-150.ec2.internal',32.0,131072000000.0,'amd64','linux','m5.8xlarge');
+INSERT INTO jobs VALUES(6781,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi1',6789,1708919572.983000041,1708924744.811000108,101502092,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,9.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9652098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
+INSERT INTO jobs VALUES(6782,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi2',6789,1708919572.983000041,1708924744.811000108,101502093,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,10.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9958098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
+INSERT INTO jobs VALUES(6783,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi3',6789,1708919572.983000041,1708924744.811000108,101502094,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,11.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9158098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
+INSERT INTO jobs VALUES(6784,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi4',6789,1708919572.983000041,1708924744.811000108,101502095,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,12.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9758098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
+INSERT INTO jobs VALUES(6785,'runner-2j2ndhxu-project-2-concurrent-0-nbogpypi5',6789,1708919572.983000041,1708924744.811000108,101502096,'success','develop','py-torch','2.2.1','{"caffe2": false, "cuda": true, "cudnn": true, "debug": false, "distributed": true, "fbgemm": true, "gloo": true, "kineto": true, "magma": false, "metal": false, "mkldnn": true, "mpi": true, "nccl": false, "nnpack": true, "numa": true, "numpy": true, "onnx_ml": true, "openmp": true, "qnnpack": true, "rocm": false, "tensorpipe": true, "test": false, "valgrind": true, "xnnpack": true, "build_system": "python_pip", "cuda_arch": "80"}','gcc','11.4.0','linux-ubuntu20.04-x86_64_v3','e4s',12,12.0,NULL,13.77948152336477605,11.98751586519425772,12.00060520666194109,0.3736576704015182604,3.811106184376615414,48000000000.0,64000000000.0,9358098890.24199867,7399608320.0,41186873344.0,85508096.0,8707419891.779100419);
diff --git a/gantry/tests/test_prediction.py b/gantry/tests/test_prediction.py
@@ -0,0 +1,111 @@
+import pytest
+
+from gantry.routes.prediction import prediction
+from gantry.tests.defs import prediction as defs
+from gantry.util.prediction import validate_payload
+
+
+@pytest.fixture
+async def db_conn_inserted(db_conn):
+    """Returns a connection to a database with 5 samples inserted"""
+
+    with open("gantry/tests/sql/insert_samples.sql") as f:
+        await db_conn.executescript(f.read())
+
+    return db_conn
+
+
+async def test_exact_match(db_conn_inserted):
+    """All fields are an exact match for 5 samples in the database."""
+
+    assert (
+        await prediction.predict_single(db_conn_inserted, defs.NORMAL_BUILD)
+        == defs.NORMAL_PREDICTION
+    )
+
+
+async def test_expensive_variants(db_conn_inserted):
+    """
+    Tests whether the algorithm filters by expensive variants.
+    The input has been modified to prevent an exact match with
+    any of the samples.
+    """
+
+    assert (
+        await prediction.predict_single(db_conn_inserted, defs.EXPENSIVE_VARIANT_BUILD)
+        == defs.NORMAL_PREDICTION
+    )
+
+
+async def test_no_variant_match(db_conn_inserted):
+    """
+    All fields match except for variants, expect default predictions with no sample.
+    """
+
+    assert (
+        await prediction.predict_single(db_conn_inserted, defs.BAD_VARIANT_BUILD)
+        == defs.DEFAULT_PREDICTION
+    )
+
+
+async def test_partial_match(db_conn_inserted):
+    """
+    Some of the fields match, so the prediction should be based on matching
+    with other fields. In reality, we're using the same dataset but just
+    testing that the prediction will be the same with a different compiler name.
+    """
+
+    # same as NORMAL_BUILD, but with a different compiler name to test partial matching
+    diff_compiler_build = defs.NORMAL_BUILD.copy()
+    diff_compiler_build["compiler"]["name"] = "gcc-different"
+
+    assert (
+        await prediction.predict_single(db_conn_inserted, diff_compiler_build)
+        == defs.NORMAL_PREDICTION
+    )
+
+
+async def test_empty_sample(db_conn):
+    """No samples in the database, so we expect default predictions."""
+
+    assert (
+        await prediction.predict_single(db_conn, defs.NORMAL_BUILD)
+        == defs.DEFAULT_PREDICTION
+    )
+
+
+# Test validate_payload
+
+
+def test_valid_payload():
+    """Tests that a valid payload returns True"""
+    assert validate_payload(defs.NORMAL_BUILD) is True
+
+
+def test_invalid_payloads():
+    """Test a series of invalid payloads"""
+
+    # non dict
+    assert validate_payload("hi") is False
+
+    build = defs.NORMAL_BUILD.copy()
+    # missing package
+    del build["package"]
+    assert validate_payload(build) is False
+
+    build = defs.NORMAL_BUILD.copy()
+    # missing compiler
+    del build["compiler"]
+    assert validate_payload(build) is False
+
+    # name and version are strings in the package and compiler
+    for key in ["name", "version"]:
+        for field in ["package", "compiler"]:
+            build = defs.NORMAL_BUILD.copy()
+            build[field][key] = 123
+            assert validate_payload(build) is False
+
+    # invalid variants
+    build = defs.NORMAL_BUILD.copy()
+    build["package"]["variants"] = "+++++"
+    assert validate_payload(build) is False