Striveworks · ntlind · Aug 22, 2024 · Jul 3, 2024 · Jul 5, 2024 · Jul 5, 2024
@@ -1,4 +1,4 @@
-name: Run benchmarks on pre-existing data
+name: Run API + client benchmarks
 
 on:
   push:

@@ -1,4 +1,4 @@
-name: Unit, functional, integration tests and code coverage
+name: Run API + client code coverage report
 
 on:
   push:

@@ -0,0 +1,38 @@
+name: Run core benchmarks
+
+on:
+  push:
+    branches: "**"
+
+permissions:
+  id-token: write
+  contents: read
+
+jobs:
+  run-benchmarks:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: install core
+        run: pip install -e .
+        working-directory: ./core
+      - name: run classification benchmarks
+        run: python benchmark_script.py
+        working-directory: ./core/benchmarks/classification
+      - name: print classification results
+        run: |
+          export BENCHMARK_RESULTS=$(python -c "import os;import json;print(json.dumps(json.load(open('results.json', 'r')), indent=4));")
+          echo "$BENCHMARK_RESULTS"
+        working-directory: ./core/benchmarks/classification
+      - name: run object detection benchmarks
+        run: python benchmark_script.py
+        working-directory: ./core/benchmarks/object-detection
+      - name: print object detection results
+        run: |
+          export BENCHMARK_RESULTS=$(python -c "import os;import json;print(json.dumps(json.load(open('results.json', 'r')), indent=4));")
+          echo "$BENCHMARK_RESULTS"
+        working-directory: ./core/benchmarks/object-detection
+      - run: make stop-env
@@ -0,0 +1,36 @@
+name: Run core code coverage report
+
+on:
+  push:
+    branches: "**"
+
+permissions:
+  id-token: write
+  contents: read
+
+jobs:
+  core-tests:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: .
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: run tests and report coverage
+        run: |
+          pip install -e ".[test]"
+          COVERAGE_FILE=.coverage.functional python -m coverage run --omit "tests/*"  -m pytest -v tests/functional-tests
+          COVERAGE_FILE=.coverage.unit python -m coverage run --omit "tests/*" -m pytest -v tests/unit-tests
+          python -m coverage combine
+          python -m coverage report -m
+          python -m coverage json
+          export TOTAL=$(python -c "import json;print(json.load(open('coverage.json'))['totals']['percent_covered_display'])")
+          echo "total=$TOTAL" >> $GITHUB_ENV
+          if (( $TOTAL < 90 )); then
+            echo "Coverage is below 90%"
+            exit 1
+          fi
+        working-directory: ./core
@@ -32,19 +32,19 @@ repos:
     rev: v1.1.376
     hooks:
       - id: pyright
-        additional_dependencies:
-          [
+        additional_dependencies: [
             "requests",
             "Pillow >= 9.1.0",
             "numpy",
+            "pandas>=2.2.2",
+            "pandas-stubs", # fixes pyright issues with pandas
             "pytest",
             "python-dotenv",
             "SQLAlchemy>=2.0",
             "fastapi[all]>=0.100.0",
             "importlib_metadata; python_version < '3.8'",
             "pydantic-settings",
             "tqdm",
-            "pandas",
             "packaging",
             "PyJWT[crypto]",
             "structlog",
@@ -57,4 +57,5 @@ repos:
             "nltk",
             "rouge_score",
             "evaluate",
+            "shapely",
           ]
diff --git a/api/valor_api/backend/metrics/classification.py b/api/valor_api/backend/metrics/classification.py
@@ -458,30 +458,34 @@ def search_datums(condition: ColumnElement[bool]):
                     else list()
                 )
                 fp = {
-                    "misclassifications": [
-                        unique_datums[datum_id] for datum_id in fp
-                    ]
-                    if fp
-                    else list()
+                    "misclassifications": (
+                        [unique_datums[datum_id] for datum_id in fp]
+                        if fp
+                        else list()
+                    )
                 }
                 tn = (
                     [unique_datums[datum_id] for datum_id in tn]
                     if tn
                     else list()
                 )
                 fn = {
-                    "misclassifications": [
-                        unique_datums[datum_id]
-                        for datum_id in fn_misclf_examples
-                    ]
-                    if fn_misclf_examples
-                    else list(),
-                    "no_predictions": [
-                        unique_datums[datum_id]
-                        for datum_id in fn_misprd_examples
-                    ]
-                    if fn_misprd_examples
-                    else list(),
+                    "misclassifications": (
+                        [
+                            unique_datums[datum_id]
+                            for datum_id in fn_misclf_examples
+                        ]
+                        if fn_misclf_examples
+                        else list()
+                    ),
+                    "no_predictions": (
+                        [
+                            unique_datums[datum_id]
+                            for datum_id in fn_misprd_examples
+                        ]
+                        if fn_misprd_examples
+                        else list()
+                    ),
                 }
 
                 detailed_pr_output[key][value][float(threshold)] = {
@@ -789,18 +793,20 @@ def _compute_roc_auc(
 
     label_keys = {key for key, _ in labels}
     return [
-        schemas.ROCAUCMetric(
-            label_key=key,
-            value=(
-                float(np.mean(label_key_to_rocauc[key]))
-                if len(label_key_to_rocauc[key]) >= 1
-                else None
-            ),
-        )
-        if (key in label_key_to_rocauc and key in predictions_label_keys)
-        else schemas.ROCAUCMetric(
-            label_key=key,
-            value=0.0,
+        (
+            schemas.ROCAUCMetric(
+                label_key=key,
+                value=(
+                    float(np.mean(label_key_to_rocauc[key]))
+                    if len(label_key_to_rocauc[key]) >= 1
+                    else None
+                ),
+            )
+            if (key in label_key_to_rocauc and key in predictions_label_keys)
+            else schemas.ROCAUCMetric(
+                label_key=key,
+                value=0.0,
+            )
         )
         for key in label_keys
     ]
@@ -997,20 +1003,18 @@ def _compute_confusion_matrices_and_metrics(
     labels: dict[int, tuple[str, str]],
     pr_curve_max_examples: int,
     metrics_to_return: list[enums.MetricType],
-) -> (
-    tuple[
-        list[schemas.ConfusionMatrix],
-        list[
-            schemas.AccuracyMetric
-            | schemas.ROCAUCMetric
-            | schemas.PrecisionMetric
-            | schemas.RecallMetric
-            | schemas.F1Metric
-            | schemas.PrecisionRecallCurve
-            | schemas.DetailedPrecisionRecallCurve
-        ],
-    ]
-):
+) -> tuple[
+    list[schemas.ConfusionMatrix],
+    list[
+        schemas.AccuracyMetric
+        | schemas.ROCAUCMetric
+        | schemas.PrecisionMetric
+        | schemas.RecallMetric
+        | schemas.F1Metric
+        | schemas.PrecisionRecallCurve
+        | schemas.DetailedPrecisionRecallCurve
+    ],
+]:
     """
     Computes the confusion matrix and all metrics for a given label key.
 

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Striveworks
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.