Skip to content

Commit

Permalink
create-docker-image (#405)
Browse files Browse the repository at this point in the history
* testing workflow

* testing workflow

* testing workflow

* testing workflow

* testing workflow

* testing workflow

* testing workflow

* testing workflow

* testing workflow

* testing workflow

* testing workflow

* this should be it, thank goodness

* use new modelgauge without extras; bump version; fix tests

* update lock file

* add .dockerignore

* add .dockerignore
  • Loading branch information
dhosterman authored Aug 12, 2024
1 parent bed7ee4 commit 59110d4
Show file tree
Hide file tree
Showing 7 changed files with 567 additions and 1,007 deletions.
9 changes: 9 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
dist
run
embed
web
tests
docs
.github
.venv
config
33 changes: 33 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Build and Publish Docker Image

on:
release:
types: [published]

jobs:
docker:
runs-on: ubuntu-latest
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push
uses: docker/build-push-action@v6
with:
push: true
tags: |
ghcr.io/${{ github.repository }}:latest
ghcr.io/${{ github.repository }}:${{ github.ref }}
platforms: |
linux/arm64/v8
linux/amd64
36 changes: 36 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Base Stage
FROM python:3.10-slim AS base

ENV PYTHONFAULTHANDLER=1 \
PYTHONHASHSEED=random \
PYTHONUNBUFFERED=1

WORKDIR /app

# Build Stage
FROM base AS builder

ENV PIP_DEFAULT_TIMEOUT=100 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_NO_CACHE_DIR=1 \
POETRY_VERSION=1.8.3

RUN pip install "poetry==$POETRY_VERSION"
RUN python -m venv /venv

COPY pyproject.toml poetry.lock ./
RUN . /venv/bin/activate && poetry install --without=dev --no-root --no-interaction --no-ansi

COPY . .
RUN . /venv/bin/activate && poetry build

# Final Stage
FROM base AS final

WORKDIR /app

COPY --from=builder /venv /venv
COPY --from=builder /app/dist .

RUN . /venv/bin/activate && pip install *.whl
ENTRYPOINT ["/venv/bin/modelbench", "--help"]
1,456 changes: 468 additions & 988 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ requires-python = ">=3.10, <3.13"

[tool.poetry]
name = "modelbench"
version = "0.5.1"
version = "0.6.0"
description = "Run benchmarks and generate reports measuring the behavior of many AI Systems."
license = "Apache-2.0"
authors = ["MLCommons AI Safety <[email protected]>"]
Expand Down Expand Up @@ -47,7 +47,7 @@ packages = [

[tool.poetry.dependencies]
python = ">=3.10,<3.13"
modelgauge = { version = ">=0.6.0", extras = ["perspective_api", "standard_tests"] }
modelgauge = ">=0.6.0"
jq = "^1.6.0"
click = "^8.1.7"
casefy = "^0.1.7"
Expand Down
4 changes: 2 additions & 2 deletions tests/test_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def test_text_grade(self, probability, expected, hazard_score):


def test_hazard_definition_basics(fake_secrets):
import modelgauge.tests.bbq
import modelgauge.tests.safe

h = SafeCaeHazard()
assert h.uid == "safe_cae_hazard-0.5"
Expand All @@ -124,7 +124,7 @@ def test_hazard_definition_basics(fake_secrets):


def test_hazard_v1_definition_basics(fake_secrets):
import modelgauge.tests.bbq
import modelgauge.tests.safe

h = SafeDfmHazardV1()
assert h.uid == "safe_dfm_hazard_v_1-1.0"
Expand Down
32 changes: 17 additions & 15 deletions tests/test_static_site_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,13 @@
from unittest.mock import MagicMock
from unittest.mock import patch

from modelgauge.tests.bbq import BBQ


import pytest

from modelbench.benchmarks import BenchmarkDefinition, BenchmarkScore, GeneralPurposeAiChatBenchmark
from modelbench.hazards import HazardScore, SafeCaeHazard, SafeCbrHazard, SafeHazard
from modelbench.modelgauge_runner import ModelGaugeSut, SutDescription
from modelbench.benchmarks import (
BenchmarkDefinition,
GeneralPurposeAiChatBenchmark,
BenchmarkScore,
)
from modelbench.scoring import ValueEstimate
from modelbench.hazards import HazardScore, SafeCaeHazard, SafeCbrHazard, SafeHazard
from modelbench.static_site_generator import HazardScorePositions, StaticContent, StaticSiteGenerator
from modelbench.static_site_generator import HazardScorePositions, StaticSiteGenerator


@pytest.fixture()
Expand Down Expand Up @@ -107,6 +100,16 @@ class TestObjectContentKeysExist:
Tests to ensure that appropriate presentation-layer content exists for objects that are added to modelbench.
"""

@pytest.fixture
def fake_test(self):
from modelgauge.base_test import BaseTest

class FakeTest(BaseTest):
def __init__(self, uid):
self.uid = uid

return FakeTest

@pytest.fixture
def ssg(self):
_ssg = StaticSiteGenerator()
Expand Down Expand Up @@ -198,14 +201,13 @@ def test_safe_hazard_definitions(self, ssg, hazard, required_template_content_ke
for key in required_template_content_keys["SafeHazard"]:
assert ssg.content(hazard(), key)

def test_tests(self, ssg):
def test_tests(self, ssg, fake_test):
# todo: This is the most naive version of this test, but we'll want a way to check all of the tests modelbench cares about at some point

test = BBQ(uid="bbq")
test = fake_test(uid="bbq")
assert ssg.content(test, "display_name") == "BBQ: Bias Benchmark for QA"

def test_test_defaults(self, ssg):
test = BBQ(uid="not_a_real_uid")
def test_test_defaults(self, ssg, fake_test):
test = fake_test(uid="not_a_real_uid")
assert ssg.content(test, "display_name") == "not_a_real_uid"
assert ssg.content(test, "not_a_real_key") == ""

Expand Down

0 comments on commit 59110d4

Please sign in to comment.